Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/init.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/init.py
@ -0,0 +1,22 @@
+""" This module provides the functions for node classification problem.
+
+The functions in this module are not imported
+into the top level `networkx` namespace.
+You can access these functions by importing
+the `networkx.algorithms.node_classification` modules,
+then accessing the functions as attributes of `node_classification`.
+For example:
+
+  >>> from networkx.algorithms import node_classification
+  >>> G = nx.path_graph(4)
+  >>> G.edges()
+  EdgeView([(0, 1), (1, 2), (2, 3)])
+  >>> G.nodes[0]["label"] = "A"
+  >>> G.nodes[3]["label"] = "B"
+  >>> node_classification.harmonic_function(G)
+  ['A', 'A', 'B', 'B']
+
+"""
+
+from networkx.algorithms.node_classification.hmn import *
+from networkx.algorithms.node_classification.lgc import *
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/hmn.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/hmn.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/lgc.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/lgc.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/utils.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/pycache/utils.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/hmn.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/hmn.py
@ -0,0 +1,145 @@
+"""Function for computing Harmonic function algorithm by Zhu et al.
+
+References
+----------
+Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
+Semi-supervised learning using gaussian fields and harmonic functions.
+In ICML (Vol. 3, pp. 912-919).
+"""
+import networkx as nx
+
+from networkx.utils.decorators import not_implemented_for
+from networkx.algorithms.node_classification.utils import (
+    _get_label_info,
+    _init_label_matrix,
+    _propagate,
+    _predict,
+)
+
+__all__ = ["harmonic_function"]
+
+
+@not_implemented_for("directed")
+def harmonic_function(G, max_iter=30, label_name="label"):
+    """Node classification by Harmonic function
+
+    Parameters
+    ----------
+    G : NetworkX Graph
+    max_iter : int
+        maximum number of iterations allowed
+    label_name : string
+        name of target labels to predict
+
+    Returns
+    ----------
+    predicted : array, shape = [n_samples]
+        Array of predicted labels
+
+    Raises
+    ----------
+    NetworkXError
+        If no nodes on `G` has `label_name`.
+
+    Examples
+    --------
+    >>> from networkx.algorithms import node_classification
+    >>> G = nx.path_graph(4)
+    >>> G.nodes[0]["label"] = "A"
+    >>> G.nodes[3]["label"] = "B"
+    >>> G.nodes(data=True)
+    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
+    >>> G.edges()
+    EdgeView([(0, 1), (1, 2), (2, 3)])
+    >>> predicted = node_classification.harmonic_function(G)
+    >>> predicted
+    ['A', 'A', 'B', 'B']
+
+    References
+    ----------
+    Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
+    Semi-supervised learning using gaussian fields and harmonic functions.
+    In ICML (Vol. 3, pp. 912-919).
+    """
+    try:
+        import numpy as np
+    except ImportError as e:
+        raise ImportError(
+            "harmonic_function() requires numpy: http://numpy.org/ "
+        ) from e
+    try:
+        from scipy import sparse
+    except ImportError as e:
+        raise ImportError(
+            "harmonic_function() requires scipy: http://scipy.org/ "
+        ) from e
+
+    def _build_propagation_matrix(X, labels):
+        """Build propagation matrix of Harmonic function
+
+        Parameters
+        ----------
+        X : scipy sparse matrix, shape = [n_samples, n_samples]
+            Adjacency matrix
+        labels : array, shape = [n_samples, 2]
+            Array of pairs of node id and label id
+
+        Returns
+        ----------
+        P : scipy sparse matrix, shape = [n_samples, n_samples]
+            Propagation matrix
+
+        """
+        degrees = X.sum(axis=0).A[0]
+        degrees[degrees == 0] = 1  # Avoid division by 0
+        D = sparse.diags((1.0 / degrees), offsets=0)
+        P = D.dot(X).tolil()
+        P[labels[:, 0]] = 0  # labels[:, 0] indicates IDs of labeled nodes
+        return P
+
+    def _build_base_matrix(X, labels, n_classes):
+        """Build base matrix of Harmonic function
+
+        Parameters
+        ----------
+        X : scipy sparse matrix, shape = [n_samples, n_samples]
+            Adjacency matrix
+        labels : array, shape = [n_samples, 2]
+            Array of pairs of node id and label id
+        n_classes : integer
+            The number of classes (distinct labels) on the input graph
+
+        Returns
+        ----------
+        B : array, shape = [n_samples, n_classes]
+            Base matrix
+        """
+        n_samples = X.shape[0]
+        B = np.zeros((n_samples, n_classes))
+        B[labels[:, 0], labels[:, 1]] = 1
+        return B
+
+    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
+    labels, label_dict = _get_label_info(G, label_name)
+
+    if labels.shape[0] == 0:
+        raise nx.NetworkXError(
+            "No node on the input graph is labeled by '" + label_name + "'."
+        )
+
+    n_samples = X.shape[0]
+    n_classes = label_dict.shape[0]
+
+    F = _init_label_matrix(n_samples, n_classes)
+
+    P = _build_propagation_matrix(X, labels)
+    B = _build_base_matrix(X, labels, n_classes)
+
+    remaining_iter = max_iter
+    while remaining_iter > 0:
+        F = _propagate(P, F, B)
+        remaining_iter -= 1
+
+    predicted = _predict(F, label_dict)
+
+    return predicted
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/lgc.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/lgc.py
@ -0,0 +1,151 @@
+"""Function for computing Local and global consistency algorithm by Zhou et al.
+
+References
+----------
+Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
+Learning with local and global consistency.
+Advances in neural information processing systems, 16(16), 321-328.
+"""
+import networkx as nx
+
+from networkx.utils.decorators import not_implemented_for
+from networkx.algorithms.node_classification.utils import (
+    _get_label_info,
+    _init_label_matrix,
+    _propagate,
+    _predict,
+)
+
+__all__ = ["local_and_global_consistency"]
+
+
+@not_implemented_for("directed")
+def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
+    """Node classification by Local and Global Consistency
+
+    Parameters
+    ----------
+    G : NetworkX Graph
+    alpha : float
+        Clamping factor
+    max_iter : int
+        Maximum number of iterations allowed
+    label_name : string
+        Name of target labels to predict
+
+    Returns
+    ----------
+    predicted : array, shape = [n_samples]
+        Array of predicted labels
+
+    Raises
+    ------
+    NetworkXError
+        If no nodes on `G` has `label_name`.
+
+    Examples
+    --------
+    >>> from networkx.algorithms import node_classification
+    >>> G = nx.path_graph(4)
+    >>> G.nodes[0]["label"] = "A"
+    >>> G.nodes[3]["label"] = "B"
+    >>> G.nodes(data=True)
+    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
+    >>> G.edges()
+    EdgeView([(0, 1), (1, 2), (2, 3)])
+    >>> predicted = node_classification.local_and_global_consistency(G)
+    >>> predicted
+    ['A', 'A', 'B', 'B']
+
+
+    References
+    ----------
+    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
+    Learning with local and global consistency.
+    Advances in neural information processing systems, 16(16), 321-328.
+    """
+    try:
+        import numpy as np
+    except ImportError as e:
+        raise ImportError(
+            "local_and_global_consistency() requires numpy: ", "http://numpy.org/ "
+        ) from e
+    try:
+        from scipy import sparse
+    except ImportError as e:
+        raise ImportError(
+            "local_and_global_consistensy() requires scipy: ", "http://scipy.org/ "
+        ) from e
+
+    def _build_propagation_matrix(X, labels, alpha):
+        """Build propagation matrix of Local and global consistency
+
+        Parameters
+        ----------
+        X : scipy sparse matrix, shape = [n_samples, n_samples]
+            Adjacency matrix
+        labels : array, shape = [n_samples, 2]
+            Array of pairs of node id and label id
+        alpha : float
+            Clamping factor
+
+        Returns
+        ----------
+        S : scipy sparse matrix, shape = [n_samples, n_samples]
+            Propagation matrix
+
+        """
+        degrees = X.sum(axis=0).A[0]
+        degrees[degrees == 0] = 1  # Avoid division by 0
+        D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
+        S = alpha * D2.dot(X).dot(D2)
+        return S
+
+    def _build_base_matrix(X, labels, alpha, n_classes):
+        """Build base matrix of Local and global consistency
+
+        Parameters
+        ----------
+        X : scipy sparse matrix, shape = [n_samples, n_samples]
+            Adjacency matrix
+        labels : array, shape = [n_samples, 2]
+            Array of pairs of node id and label id
+        alpha : float
+            Clamping factor
+        n_classes : integer
+            The number of classes (distinct labels) on the input graph
+
+        Returns
+        ----------
+        B : array, shape = [n_samples, n_classes]
+            Base matrix
+        """
+
+        n_samples = X.shape[0]
+        B = np.zeros((n_samples, n_classes))
+        B[labels[:, 0], labels[:, 1]] = 1 - alpha
+        return B
+
+    X = nx.to_scipy_sparse_matrix(G)  # adjacency matrix
+    labels, label_dict = _get_label_info(G, label_name)
+
+    if labels.shape[0] == 0:
+        raise nx.NetworkXError(
+            "No node on the input graph is labeled by '" + label_name + "'."
+        )
+
+    n_samples = X.shape[0]
+    n_classes = label_dict.shape[0]
+    F = _init_label_matrix(n_samples, n_classes)
+
+    P = _build_propagation_matrix(X, labels, alpha)
+    B = _build_base_matrix(X, labels, alpha, n_classes)
+
+    remaining_iter = max_iter
+    while remaining_iter > 0:
+        F = _propagate(P, F, B)
+        remaining_iter -= 1
+
+    predicted = _predict(F, label_dict)
+
+    return predicted
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/init.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/init.py
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/test_harmonic_function.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/test_harmonic_function.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/test_local_and_global_consistency.cpython-36.pyc
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/pycache/test_local_and_global_consistency.cpython-36.pyc
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/test_harmonic_function.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/test_harmonic_function.py
@ -0,0 +1,76 @@
+import pytest
+
+numpy = pytest.importorskip("numpy")
+scipy = pytest.importorskip("scipy")
+
+import networkx as nx
+from networkx.algorithms import node_classification
+
+
+class TestHarmonicFunction:
+    def test_path_graph(self):
+        G = nx.path_graph(4)
+        label_name = "label"
+        G.nodes[0][label_name] = "A"
+        G.nodes[3][label_name] = "B"
+        predicted = node_classification.harmonic_function(G, label_name=label_name)
+        assert predicted[0] == "A"
+        assert predicted[1] == "A"
+        assert predicted[2] == "B"
+        assert predicted[3] == "B"
+
+    def test_no_labels(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.path_graph(4)
+            node_classification.harmonic_function(G)
+
+    def test_no_nodes(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.Graph()
+            node_classification.harmonic_function(G)
+
+    def test_no_edges(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.Graph()
+            G.add_node(1)
+            G.add_node(2)
+            node_classification.harmonic_function(G)
+
+    def test_digraph(self):
+        with pytest.raises(nx.NetworkXNotImplemented):
+            G = nx.DiGraph()
+            G.add_edge(0, 1)
+            G.add_edge(1, 2)
+            G.add_edge(2, 3)
+            label_name = "label"
+            G.nodes[0][label_name] = "A"
+            G.nodes[3][label_name] = "B"
+            node_classification.harmonic_function(G)
+
+    def test_one_labeled_node(self):
+        G = nx.path_graph(4)
+        label_name = "label"
+        G.nodes[0][label_name] = "A"
+        predicted = node_classification.harmonic_function(G, label_name=label_name)
+        assert predicted[0] == "A"
+        assert predicted[1] == "A"
+        assert predicted[2] == "A"
+        assert predicted[3] == "A"
+
+    def test_nodes_all_labeled(self):
+        G = nx.karate_club_graph()
+        label_name = "club"
+        predicted = node_classification.harmonic_function(G, label_name=label_name)
+        for i in range(len(G)):
+            assert predicted[i] == G.nodes[i][label_name]
+
+    def test_labeled_nodes_are_not_changed(self):
+        G = nx.karate_club_graph()
+        label_name = "club"
+        label_removed = {0, 1, 2, 3, 4, 5, 6, 7}
+        for i in label_removed:
+            del G.nodes[i][label_name]
+        predicted = node_classification.harmonic_function(G, label_name=label_name)
+        label_not_removed = set(list(range(len(G)))) - label_removed
+        for i in label_not_removed:
+            assert predicted[i] == G.nodes[i][label_name]
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py
@ -0,0 +1,72 @@
+import pytest
+
+numpy = pytest.importorskip("numpy")
+scipy = pytest.importorskip("scipy")
+
+
+import networkx as nx
+from networkx.algorithms import node_classification
+
+
+class TestLocalAndGlobalConsistency:
+    def test_path_graph(self):
+        G = nx.path_graph(4)
+        label_name = "label"
+        G.nodes[0][label_name] = "A"
+        G.nodes[3][label_name] = "B"
+        predicted = node_classification.local_and_global_consistency(
+            G, label_name=label_name
+        )
+        assert predicted[0] == "A"
+        assert predicted[1] == "A"
+        assert predicted[2] == "B"
+        assert predicted[3] == "B"
+
+    def test_no_labels(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.path_graph(4)
+            node_classification.local_and_global_consistency(G)
+
+    def test_no_nodes(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.Graph()
+            node_classification.local_and_global_consistency(G)
+
+    def test_no_edges(self):
+        with pytest.raises(nx.NetworkXError):
+            G = nx.Graph()
+            G.add_node(1)
+            G.add_node(2)
+            node_classification.local_and_global_consistency(G)
+
+    def test_digraph(self):
+        with pytest.raises(nx.NetworkXNotImplemented):
+            G = nx.DiGraph()
+            G.add_edge(0, 1)
+            G.add_edge(1, 2)
+            G.add_edge(2, 3)
+            label_name = "label"
+            G.nodes[0][label_name] = "A"
+            G.nodes[3][label_name] = "B"
+            node_classification.harmonic_function(G)
+
+    def test_one_labeled_node(self):
+        G = nx.path_graph(4)
+        label_name = "label"
+        G.nodes[0][label_name] = "A"
+        predicted = node_classification.local_and_global_consistency(
+            G, label_name=label_name
+        )
+        assert predicted[0] == "A"
+        assert predicted[1] == "A"
+        assert predicted[2] == "A"
+        assert predicted[3] == "A"
+
+    def test_nodes_all_labeled(self):
+        G = nx.karate_club_graph()
+        label_name = "club"
+        predicted = node_classification.local_and_global_consistency(
+            G, alpha=0, label_name=label_name
+        )
+        for i in range(len(G)):
+            assert predicted[i] == G.nodes[i][label_name]
--- a/venv/Lib/site-packages/networkx/algorithms/node_classification/utils.py
+++ b/venv/Lib/site-packages/networkx/algorithms/node_classification/utils.py
@ -0,0 +1,99 @@
+def _propagate(P, F, B):
+    """Propagate labels by one step
+
+    Parameters
+    ----------
+    P : scipy sparse matrix, shape = [n_samples, n_samples]
+        Propagation matrix
+    F : numpy array, shape = [n_samples, n_classes]
+        Label matrix
+    B : numpy array, shape = [n_samples, n_classes]
+        Base matrix
+
+    Returns
+    ----------
+    F_new : array, shape = [n_samples, n_classes]
+        Label matrix
+    """
+    F_new = P.dot(F) + B
+    return F_new
+
+
+def _get_label_info(G, label_name):
+    """Get and return information of labels from the input graph
+
+    Parameters
+    ----------
+    G : Network X graph
+    label_name : string
+        Name of the target label
+
+    Returns
+    ----------
+    labels : numpy array, shape = [n_labeled_samples, 2]
+        Array of pairs of labeled node ID and label ID
+    label_dict : numpy array, shape = [n_classes]
+        Array of labels
+        i-th element contains the label corresponding label ID `i`
+    """
+    import numpy as np
+
+    labels = []
+    label_to_id = {}
+    lid = 0
+    for i, n in enumerate(G.nodes(data=True)):
+        if label_name in n[1]:
+            label = n[1][label_name]
+            if label not in label_to_id:
+                label_to_id[label] = lid
+                lid += 1
+            labels.append([i, label_to_id[label]])
+    labels = np.array(labels)
+    label_dict = np.array(
+        [label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
+    )
+    return (labels, label_dict)
+
+
+def _init_label_matrix(n_samples, n_classes):
+    """Create and return zero matrix
+
+    Parameters
+    ----------
+    n_samples : integer
+        The number of nodes (samples) on the input graph
+    n_classes : integer
+        The number of classes (distinct labels) on the input graph
+
+    Returns
+    ----------
+    F : numpy array, shape = [n_samples, n_classes]
+        Label matrix
+    """
+    import numpy as np
+
+    F = np.zeros((n_samples, n_classes))
+    return F
+
+
+def _predict(F, label_dict):
+    """Predict labels by learnt label matrix
+
+    Parameters
+    ----------
+    F : numpy array, shape = [n_samples, n_classes]
+        Learnt (resulting) label matrix
+    label_dict : numpy array, shape = [n_classes]
+        Array of labels
+        i-th element contains the label corresponding label ID `i`
+
+    Returns
+    ----------
+    predicted : numpy array, shape = [n_samples]
+        Array of predicted labels
+    """
+    import numpy as np
+
+    predicted_label_ids = np.argmax(F, axis=1)
+    predicted = label_dict[predicted_label_ids].tolist()
+    return predicted