Fixed database typo and removed unnecessary class identifier.
This commit is contained in:
parent
00ad49a143
commit
45fb349a7d
5098 changed files with 952558 additions and 85 deletions
|
@ -0,0 +1,22 @@
|
|||
""" This module provides the functions for node classification problem.
|
||||
|
||||
The functions in this module are not imported
|
||||
into the top level `networkx` namespace.
|
||||
You can access these functions by importing
|
||||
the `networkx.algorithms.node_classification` modules,
|
||||
then accessing the functions as attributes of `node_classification`.
|
||||
For example:
|
||||
|
||||
>>> from networkx.algorithms import node_classification
|
||||
>>> G = nx.path_graph(4)
|
||||
>>> G.edges()
|
||||
EdgeView([(0, 1), (1, 2), (2, 3)])
|
||||
>>> G.nodes[0]["label"] = "A"
|
||||
>>> G.nodes[3]["label"] = "B"
|
||||
>>> node_classification.harmonic_function(G)
|
||||
['A', 'A', 'B', 'B']
|
||||
|
||||
"""
|
||||
|
||||
from networkx.algorithms.node_classification.hmn import *
|
||||
from networkx.algorithms.node_classification.lgc import *
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,145 @@
|
|||
"""Function for computing Harmonic function algorithm by Zhu et al.
|
||||
|
||||
References
|
||||
----------
|
||||
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
|
||||
Semi-supervised learning using gaussian fields and harmonic functions.
|
||||
In ICML (Vol. 3, pp. 912-919).
|
||||
"""
|
||||
import networkx as nx
|
||||
|
||||
from networkx.utils.decorators import not_implemented_for
|
||||
from networkx.algorithms.node_classification.utils import (
|
||||
_get_label_info,
|
||||
_init_label_matrix,
|
||||
_propagate,
|
||||
_predict,
|
||||
)
|
||||
|
||||
__all__ = ["harmonic_function"]
|
||||
|
||||
|
||||
@not_implemented_for("directed")
|
||||
def harmonic_function(G, max_iter=30, label_name="label"):
|
||||
"""Node classification by Harmonic function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
G : NetworkX Graph
|
||||
max_iter : int
|
||||
maximum number of iterations allowed
|
||||
label_name : string
|
||||
name of target labels to predict
|
||||
|
||||
Returns
|
||||
----------
|
||||
predicted : array, shape = [n_samples]
|
||||
Array of predicted labels
|
||||
|
||||
Raises
|
||||
----------
|
||||
NetworkXError
|
||||
If no nodes on `G` has `label_name`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from networkx.algorithms import node_classification
|
||||
>>> G = nx.path_graph(4)
|
||||
>>> G.nodes[0]["label"] = "A"
|
||||
>>> G.nodes[3]["label"] = "B"
|
||||
>>> G.nodes(data=True)
|
||||
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
|
||||
>>> G.edges()
|
||||
EdgeView([(0, 1), (1, 2), (2, 3)])
|
||||
>>> predicted = node_classification.harmonic_function(G)
|
||||
>>> predicted
|
||||
['A', 'A', 'B', 'B']
|
||||
|
||||
References
|
||||
----------
|
||||
Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
|
||||
Semi-supervised learning using gaussian fields and harmonic functions.
|
||||
In ICML (Vol. 3, pp. 912-919).
|
||||
"""
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"harmonic_function() requires numpy: http://numpy.org/ "
|
||||
) from e
|
||||
try:
|
||||
from scipy import sparse
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"harmonic_function() requires scipy: http://scipy.org/ "
|
||||
) from e
|
||||
|
||||
def _build_propagation_matrix(X, labels):
|
||||
"""Build propagation matrix of Harmonic function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Adjacency matrix
|
||||
labels : array, shape = [n_samples, 2]
|
||||
Array of pairs of node id and label id
|
||||
|
||||
Returns
|
||||
----------
|
||||
P : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Propagation matrix
|
||||
|
||||
"""
|
||||
degrees = X.sum(axis=0).A[0]
|
||||
degrees[degrees == 0] = 1 # Avoid division by 0
|
||||
D = sparse.diags((1.0 / degrees), offsets=0)
|
||||
P = D.dot(X).tolil()
|
||||
P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
|
||||
return P
|
||||
|
||||
def _build_base_matrix(X, labels, n_classes):
|
||||
"""Build base matrix of Harmonic function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Adjacency matrix
|
||||
labels : array, shape = [n_samples, 2]
|
||||
Array of pairs of node id and label id
|
||||
n_classes : integer
|
||||
The number of classes (distinct labels) on the input graph
|
||||
|
||||
Returns
|
||||
----------
|
||||
B : array, shape = [n_samples, n_classes]
|
||||
Base matrix
|
||||
"""
|
||||
n_samples = X.shape[0]
|
||||
B = np.zeros((n_samples, n_classes))
|
||||
B[labels[:, 0], labels[:, 1]] = 1
|
||||
return B
|
||||
|
||||
X = nx.to_scipy_sparse_matrix(G) # adjacency matrix
|
||||
labels, label_dict = _get_label_info(G, label_name)
|
||||
|
||||
if labels.shape[0] == 0:
|
||||
raise nx.NetworkXError(
|
||||
"No node on the input graph is labeled by '" + label_name + "'."
|
||||
)
|
||||
|
||||
n_samples = X.shape[0]
|
||||
n_classes = label_dict.shape[0]
|
||||
|
||||
F = _init_label_matrix(n_samples, n_classes)
|
||||
|
||||
P = _build_propagation_matrix(X, labels)
|
||||
B = _build_base_matrix(X, labels, n_classes)
|
||||
|
||||
remaining_iter = max_iter
|
||||
while remaining_iter > 0:
|
||||
F = _propagate(P, F, B)
|
||||
remaining_iter -= 1
|
||||
|
||||
predicted = _predict(F, label_dict)
|
||||
|
||||
return predicted
|
|
@ -0,0 +1,151 @@
|
|||
"""Function for computing Local and global consistency algorithm by Zhou et al.
|
||||
|
||||
References
|
||||
----------
|
||||
Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
|
||||
Learning with local and global consistency.
|
||||
Advances in neural information processing systems, 16(16), 321-328.
|
||||
"""
|
||||
import networkx as nx
|
||||
|
||||
from networkx.utils.decorators import not_implemented_for
|
||||
from networkx.algorithms.node_classification.utils import (
|
||||
_get_label_info,
|
||||
_init_label_matrix,
|
||||
_propagate,
|
||||
_predict,
|
||||
)
|
||||
|
||||
__all__ = ["local_and_global_consistency"]
|
||||
|
||||
|
||||
@not_implemented_for("directed")
|
||||
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
|
||||
"""Node classification by Local and Global Consistency
|
||||
|
||||
Parameters
|
||||
----------
|
||||
G : NetworkX Graph
|
||||
alpha : float
|
||||
Clamping factor
|
||||
max_iter : int
|
||||
Maximum number of iterations allowed
|
||||
label_name : string
|
||||
Name of target labels to predict
|
||||
|
||||
Returns
|
||||
----------
|
||||
predicted : array, shape = [n_samples]
|
||||
Array of predicted labels
|
||||
|
||||
Raises
|
||||
------
|
||||
NetworkXError
|
||||
If no nodes on `G` has `label_name`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from networkx.algorithms import node_classification
|
||||
>>> G = nx.path_graph(4)
|
||||
>>> G.nodes[0]["label"] = "A"
|
||||
>>> G.nodes[3]["label"] = "B"
|
||||
>>> G.nodes(data=True)
|
||||
NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
|
||||
>>> G.edges()
|
||||
EdgeView([(0, 1), (1, 2), (2, 3)])
|
||||
>>> predicted = node_classification.local_and_global_consistency(G)
|
||||
>>> predicted
|
||||
['A', 'A', 'B', 'B']
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
|
||||
Learning with local and global consistency.
|
||||
Advances in neural information processing systems, 16(16), 321-328.
|
||||
"""
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"local_and_global_consistency() requires numpy: ", "http://numpy.org/ "
|
||||
) from e
|
||||
try:
|
||||
from scipy import sparse
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"local_and_global_consistensy() requires scipy: ", "http://scipy.org/ "
|
||||
) from e
|
||||
|
||||
def _build_propagation_matrix(X, labels, alpha):
|
||||
"""Build propagation matrix of Local and global consistency
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Adjacency matrix
|
||||
labels : array, shape = [n_samples, 2]
|
||||
Array of pairs of node id and label id
|
||||
alpha : float
|
||||
Clamping factor
|
||||
|
||||
Returns
|
||||
----------
|
||||
S : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Propagation matrix
|
||||
|
||||
"""
|
||||
degrees = X.sum(axis=0).A[0]
|
||||
degrees[degrees == 0] = 1 # Avoid division by 0
|
||||
D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
|
||||
S = alpha * D2.dot(X).dot(D2)
|
||||
return S
|
||||
|
||||
def _build_base_matrix(X, labels, alpha, n_classes):
|
||||
"""Build base matrix of Local and global consistency
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Adjacency matrix
|
||||
labels : array, shape = [n_samples, 2]
|
||||
Array of pairs of node id and label id
|
||||
alpha : float
|
||||
Clamping factor
|
||||
n_classes : integer
|
||||
The number of classes (distinct labels) on the input graph
|
||||
|
||||
Returns
|
||||
----------
|
||||
B : array, shape = [n_samples, n_classes]
|
||||
Base matrix
|
||||
"""
|
||||
|
||||
n_samples = X.shape[0]
|
||||
B = np.zeros((n_samples, n_classes))
|
||||
B[labels[:, 0], labels[:, 1]] = 1 - alpha
|
||||
return B
|
||||
|
||||
X = nx.to_scipy_sparse_matrix(G) # adjacency matrix
|
||||
labels, label_dict = _get_label_info(G, label_name)
|
||||
|
||||
if labels.shape[0] == 0:
|
||||
raise nx.NetworkXError(
|
||||
"No node on the input graph is labeled by '" + label_name + "'."
|
||||
)
|
||||
|
||||
n_samples = X.shape[0]
|
||||
n_classes = label_dict.shape[0]
|
||||
F = _init_label_matrix(n_samples, n_classes)
|
||||
|
||||
P = _build_propagation_matrix(X, labels, alpha)
|
||||
B = _build_base_matrix(X, labels, alpha, n_classes)
|
||||
|
||||
remaining_iter = max_iter
|
||||
while remaining_iter > 0:
|
||||
F = _propagate(P, F, B)
|
||||
remaining_iter -= 1
|
||||
|
||||
predicted = _predict(F, label_dict)
|
||||
|
||||
return predicted
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,76 @@
|
|||
import pytest
|
||||
|
||||
numpy = pytest.importorskip("numpy")
|
||||
scipy = pytest.importorskip("scipy")
|
||||
|
||||
import networkx as nx
|
||||
from networkx.algorithms import node_classification
|
||||
|
||||
|
||||
class TestHarmonicFunction:
|
||||
def test_path_graph(self):
|
||||
G = nx.path_graph(4)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
G.nodes[3][label_name] = "B"
|
||||
predicted = node_classification.harmonic_function(G, label_name=label_name)
|
||||
assert predicted[0] == "A"
|
||||
assert predicted[1] == "A"
|
||||
assert predicted[2] == "B"
|
||||
assert predicted[3] == "B"
|
||||
|
||||
def test_no_labels(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.path_graph(4)
|
||||
node_classification.harmonic_function(G)
|
||||
|
||||
def test_no_nodes(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.Graph()
|
||||
node_classification.harmonic_function(G)
|
||||
|
||||
def test_no_edges(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.Graph()
|
||||
G.add_node(1)
|
||||
G.add_node(2)
|
||||
node_classification.harmonic_function(G)
|
||||
|
||||
def test_digraph(self):
|
||||
with pytest.raises(nx.NetworkXNotImplemented):
|
||||
G = nx.DiGraph()
|
||||
G.add_edge(0, 1)
|
||||
G.add_edge(1, 2)
|
||||
G.add_edge(2, 3)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
G.nodes[3][label_name] = "B"
|
||||
node_classification.harmonic_function(G)
|
||||
|
||||
def test_one_labeled_node(self):
|
||||
G = nx.path_graph(4)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
predicted = node_classification.harmonic_function(G, label_name=label_name)
|
||||
assert predicted[0] == "A"
|
||||
assert predicted[1] == "A"
|
||||
assert predicted[2] == "A"
|
||||
assert predicted[3] == "A"
|
||||
|
||||
def test_nodes_all_labeled(self):
|
||||
G = nx.karate_club_graph()
|
||||
label_name = "club"
|
||||
predicted = node_classification.harmonic_function(G, label_name=label_name)
|
||||
for i in range(len(G)):
|
||||
assert predicted[i] == G.nodes[i][label_name]
|
||||
|
||||
def test_labeled_nodes_are_not_changed(self):
|
||||
G = nx.karate_club_graph()
|
||||
label_name = "club"
|
||||
label_removed = {0, 1, 2, 3, 4, 5, 6, 7}
|
||||
for i in label_removed:
|
||||
del G.nodes[i][label_name]
|
||||
predicted = node_classification.harmonic_function(G, label_name=label_name)
|
||||
label_not_removed = set(list(range(len(G)))) - label_removed
|
||||
for i in label_not_removed:
|
||||
assert predicted[i] == G.nodes[i][label_name]
|
|
@ -0,0 +1,72 @@
|
|||
import pytest
|
||||
|
||||
numpy = pytest.importorskip("numpy")
|
||||
scipy = pytest.importorskip("scipy")
|
||||
|
||||
|
||||
import networkx as nx
|
||||
from networkx.algorithms import node_classification
|
||||
|
||||
|
||||
class TestLocalAndGlobalConsistency:
|
||||
def test_path_graph(self):
|
||||
G = nx.path_graph(4)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
G.nodes[3][label_name] = "B"
|
||||
predicted = node_classification.local_and_global_consistency(
|
||||
G, label_name=label_name
|
||||
)
|
||||
assert predicted[0] == "A"
|
||||
assert predicted[1] == "A"
|
||||
assert predicted[2] == "B"
|
||||
assert predicted[3] == "B"
|
||||
|
||||
def test_no_labels(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.path_graph(4)
|
||||
node_classification.local_and_global_consistency(G)
|
||||
|
||||
def test_no_nodes(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.Graph()
|
||||
node_classification.local_and_global_consistency(G)
|
||||
|
||||
def test_no_edges(self):
|
||||
with pytest.raises(nx.NetworkXError):
|
||||
G = nx.Graph()
|
||||
G.add_node(1)
|
||||
G.add_node(2)
|
||||
node_classification.local_and_global_consistency(G)
|
||||
|
||||
def test_digraph(self):
|
||||
with pytest.raises(nx.NetworkXNotImplemented):
|
||||
G = nx.DiGraph()
|
||||
G.add_edge(0, 1)
|
||||
G.add_edge(1, 2)
|
||||
G.add_edge(2, 3)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
G.nodes[3][label_name] = "B"
|
||||
node_classification.harmonic_function(G)
|
||||
|
||||
def test_one_labeled_node(self):
|
||||
G = nx.path_graph(4)
|
||||
label_name = "label"
|
||||
G.nodes[0][label_name] = "A"
|
||||
predicted = node_classification.local_and_global_consistency(
|
||||
G, label_name=label_name
|
||||
)
|
||||
assert predicted[0] == "A"
|
||||
assert predicted[1] == "A"
|
||||
assert predicted[2] == "A"
|
||||
assert predicted[3] == "A"
|
||||
|
||||
def test_nodes_all_labeled(self):
|
||||
G = nx.karate_club_graph()
|
||||
label_name = "club"
|
||||
predicted = node_classification.local_and_global_consistency(
|
||||
G, alpha=0, label_name=label_name
|
||||
)
|
||||
for i in range(len(G)):
|
||||
assert predicted[i] == G.nodes[i][label_name]
|
|
@ -0,0 +1,99 @@
|
|||
def _propagate(P, F, B):
|
||||
"""Propagate labels by one step
|
||||
|
||||
Parameters
|
||||
----------
|
||||
P : scipy sparse matrix, shape = [n_samples, n_samples]
|
||||
Propagation matrix
|
||||
F : numpy array, shape = [n_samples, n_classes]
|
||||
Label matrix
|
||||
B : numpy array, shape = [n_samples, n_classes]
|
||||
Base matrix
|
||||
|
||||
Returns
|
||||
----------
|
||||
F_new : array, shape = [n_samples, n_classes]
|
||||
Label matrix
|
||||
"""
|
||||
F_new = P.dot(F) + B
|
||||
return F_new
|
||||
|
||||
|
||||
def _get_label_info(G, label_name):
|
||||
"""Get and return information of labels from the input graph
|
||||
|
||||
Parameters
|
||||
----------
|
||||
G : Network X graph
|
||||
label_name : string
|
||||
Name of the target label
|
||||
|
||||
Returns
|
||||
----------
|
||||
labels : numpy array, shape = [n_labeled_samples, 2]
|
||||
Array of pairs of labeled node ID and label ID
|
||||
label_dict : numpy array, shape = [n_classes]
|
||||
Array of labels
|
||||
i-th element contains the label corresponding label ID `i`
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
labels = []
|
||||
label_to_id = {}
|
||||
lid = 0
|
||||
for i, n in enumerate(G.nodes(data=True)):
|
||||
if label_name in n[1]:
|
||||
label = n[1][label_name]
|
||||
if label not in label_to_id:
|
||||
label_to_id[label] = lid
|
||||
lid += 1
|
||||
labels.append([i, label_to_id[label]])
|
||||
labels = np.array(labels)
|
||||
label_dict = np.array(
|
||||
[label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
|
||||
)
|
||||
return (labels, label_dict)
|
||||
|
||||
|
||||
def _init_label_matrix(n_samples, n_classes):
|
||||
"""Create and return zero matrix
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples : integer
|
||||
The number of nodes (samples) on the input graph
|
||||
n_classes : integer
|
||||
The number of classes (distinct labels) on the input graph
|
||||
|
||||
Returns
|
||||
----------
|
||||
F : numpy array, shape = [n_samples, n_classes]
|
||||
Label matrix
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
F = np.zeros((n_samples, n_classes))
|
||||
return F
|
||||
|
||||
|
||||
def _predict(F, label_dict):
|
||||
"""Predict labels by learnt label matrix
|
||||
|
||||
Parameters
|
||||
----------
|
||||
F : numpy array, shape = [n_samples, n_classes]
|
||||
Learnt (resulting) label matrix
|
||||
label_dict : numpy array, shape = [n_classes]
|
||||
Array of labels
|
||||
i-th element contains the label corresponding label ID `i`
|
||||
|
||||
Returns
|
||||
----------
|
||||
predicted : numpy array, shape = [n_samples]
|
||||
Array of predicted labels
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
predicted_label_ids = np.argmax(F, axis=1)
|
||||
predicted = label_dict[predicted_label_ids].tolist()
|
||||
return predicted
|
Loading…
Add table
Add a link
Reference in a new issue