Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/networkx/linalg/attrmatrix.py
+++ b/venv/Lib/site-packages/networkx/linalg/attrmatrix.py
@ -0,0 +1,473 @@
+"""
+    Functions for constructing matrix-like objects from graph attributes.
+"""
+
+__all__ = ["attr_matrix", "attr_sparse_matrix"]
+
+
+def _node_value(G, node_attr):
+    """Returns a function that returns a value from G.nodes[u].
+
+    We return a function expecting a node as its sole argument. Then, in the
+    simplest scenario, the returned function will return G.nodes[u][node_attr].
+    However, we also handle the case when `node_attr` is None or when it is a
+    function itself.
+
+    Parameters
+    ----------
+    G : graph
+        A NetworkX graph
+
+    node_attr : {None, str, callable}
+        Specification of how the value of the node attribute should be obtained
+        from the node attribute dictionary.
+
+    Returns
+    -------
+    value : function
+        A function expecting a node as its sole argument. The function will
+        returns a value from G.nodes[u] that depends on `edge_attr`.
+
+    """
+    if node_attr is None:
+
+        def value(u):
+            return u
+
+    elif not hasattr(node_attr, "__call__"):
+        # assume it is a key for the node attribute dictionary
+        def value(u):
+            return G.nodes[u][node_attr]
+
+    else:
+        # Advanced:  Allow users to specify something else.
+        #
+        # For example,
+        #     node_attr = lambda u: G.nodes[u].get('size', .5) * 3
+        #
+        value = node_attr
+
+    return value
+
+
+def _edge_value(G, edge_attr):
+    """Returns a function that returns a value from G[u][v].
+
+    Suppose there exists an edge between u and v.  Then we return a function
+    expecting u and v as arguments.  For Graph and DiGraph, G[u][v] is
+    the edge attribute dictionary, and the function (essentially) returns
+    G[u][v][edge_attr].  However, we also handle cases when `edge_attr` is None
+    and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
+    is a dictionary of all edges between u and v.  In this case, the returned
+    function sums the value of `edge_attr` for every edge between u and v.
+
+    Parameters
+    ----------
+    G : graph
+       A NetworkX graph
+
+    edge_attr : {None, str, callable}
+        Specification of how the value of the edge attribute should be obtained
+        from the edge attribute dictionary, G[u][v].  For multigraphs, G[u][v]
+        is a dictionary of all the edges between u and v.  This allows for
+        special treatment of multiedges.
+
+    Returns
+    -------
+    value : function
+        A function expecting two nodes as parameters. The nodes should
+        represent the from- and to- node of an edge. The function will
+        return a value from G[u][v] that depends on `edge_attr`.
+
+    """
+
+    if edge_attr is None:
+        # topological count of edges
+
+        if G.is_multigraph():
+
+            def value(u, v):
+                return len(G[u][v])
+
+        else:
+
+            def value(u, v):
+                return 1
+
+    elif not hasattr(edge_attr, "__call__"):
+        # assume it is a key for the edge attribute dictionary
+
+        if edge_attr == "weight":
+            # provide a default value
+            if G.is_multigraph():
+
+                def value(u, v):
+                    return sum([d.get(edge_attr, 1) for d in G[u][v].values()])
+
+            else:
+
+                def value(u, v):
+                    return G[u][v].get(edge_attr, 1)
+
+        else:
+            # otherwise, the edge attribute MUST exist for each edge
+            if G.is_multigraph():
+
+                def value(u, v):
+                    return sum([d[edge_attr] for d in G[u][v].values()])
+
+            else:
+
+                def value(u, v):
+                    return G[u][v][edge_attr]
+
+    else:
+        # Advanced:  Allow users to specify something else.
+        #
+        # Alternative default value:
+        #     edge_attr = lambda u,v: G[u][v].get('thickness', .5)
+        #
+        # Function on an attribute:
+        #     edge_attr = lambda u,v: abs(G[u][v]['weight'])
+        #
+        # Handle Multi(Di)Graphs differently:
+        #     edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
+        #
+        # Ignore multiple edges
+        #     edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
+        #
+        value = edge_attr
+
+    return value
+
+
+def attr_matrix(
+    G,
+    edge_attr=None,
+    node_attr=None,
+    normalized=False,
+    rc_order=None,
+    dtype=None,
+    order=None,
+):
+    """Returns a NumPy matrix using attributes from G.
+
+    If only `G` is passed in, then the adjacency matrix is constructed.
+
+    Let A be a discrete set of values for the node attribute `node_attr`. Then
+    the elements of A represent the rows and columns of the constructed matrix.
+    Now, iterate through every edge e=(u,v) in `G` and consider the value
+    of the edge attribute `edge_attr`.  If ua and va are the values of the
+    node attribute `node_attr` for u and v, respectively, then the value of
+    the edge attribute is added to the matrix element at (ua, va).
+
+    Parameters
+    ----------
+    G : graph
+        The NetworkX graph used to construct the NumPy matrix.
+
+    edge_attr : str, optional
+        Each element of the matrix represents a running total of the
+        specified edge attribute for edges whose node attributes correspond
+        to the rows/cols of the matirx. The attribute must be present for
+        all edges in the graph. If no attribute is specified, then we
+        just count the number of edges whose node attributes correspond
+        to the matrix element.
+
+    node_attr : str, optional
+        Each row and column in the matrix represents a particular value
+        of the node attribute.  The attribute must be present for all nodes
+        in the graph. Note, the values of this attribute should be reliably
+        hashable. So, float values are not recommended. If no attribute is
+        specified, then the rows and columns will be the nodes of the graph.
+
+    normalized : bool, optional
+        If True, then each row is normalized by the summation of its values.
+
+    rc_order : list, optional
+        A list of the node attribute values. This list specifies the ordering
+        of rows and columns of the array. If no ordering is provided, then
+        the ordering will be random (and also, a return value).
+
+    Other Parameters
+    ----------------
+    dtype : NumPy data-type, optional
+        A valid NumPy dtype used to initialize the array. Keep in mind certain
+        dtypes can yield unexpected results if the array is to be normalized.
+        The parameter is passed to numpy.zeros(). If unspecified, the NumPy
+        default is used.
+
+    order : {'C', 'F'}, optional
+        Whether to store multidimensional data in C- or Fortran-contiguous
+        (row- or column-wise) order in memory. This parameter is passed to
+        numpy.zeros(). If unspecified, the NumPy default is used.
+
+    Returns
+    -------
+    M : NumPy matrix
+        The attribute matrix.
+
+    ordering : list
+        If `rc_order` was specified, then only the matrix is returned.
+        However, if `rc_order` was None, then the ordering used to construct
+        the matrix is returned as well.
+
+    Examples
+    --------
+    Construct an adjacency matrix:
+
+    >>> G = nx.Graph()
+    >>> G.add_edge(0, 1, thickness=1, weight=3)
+    >>> G.add_edge(0, 2, thickness=2)
+    >>> G.add_edge(1, 2, thickness=3)
+    >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
+    matrix([[0., 1., 1.],
+            [1., 0., 1.],
+            [1., 1., 0.]])
+
+    Alternatively, we can obtain the matrix describing edge thickness.
+
+    >>> nx.attr_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
+    matrix([[0., 1., 2.],
+            [1., 0., 3.],
+            [2., 3., 0.]])
+
+    We can also color the nodes and ask for the probability distribution over
+    all edges (u,v) describing:
+
+        Pr(v has color Y | u has color X)
+
+    >>> G.nodes[0]["color"] = "red"
+    >>> G.nodes[1]["color"] = "red"
+    >>> G.nodes[2]["color"] = "blue"
+    >>> rc = ["red", "blue"]
+    >>> nx.attr_matrix(G, node_attr="color", normalized=True, rc_order=rc)
+    matrix([[0.33333333, 0.66666667],
+            [1.        , 0.        ]])
+
+    For example, the above tells us that for all edges (u,v):
+
+        Pr( v is red  | u is red)  = 1/3
+        Pr( v is blue | u is red)  = 2/3
+
+        Pr( v is red  | u is blue) = 1
+        Pr( v is blue | u is blue) = 0
+
+    Finally, we can obtain the total weights listed by the node colors.
+
+    >>> nx.attr_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
+    matrix([[3., 2.],
+            [2., 0.]])
+
+    Thus, the total weight over all edges (u,v) with u and v having colors:
+
+        (red, red)   is 3   # the sole contribution is from edge (0,1)
+        (red, blue)  is 2   # contributions from edges (0,2) and (1,2)
+        (blue, red)  is 2   # same as (red, blue) since graph is undirected
+        (blue, blue) is 0   # there are no edges with blue endpoints
+
+    """
+    try:
+        import numpy as np
+    except ImportError as e:
+        raise ImportError("attr_matrix() requires numpy: http://scipy.org/ ") from e
+
+    edge_value = _edge_value(G, edge_attr)
+    node_value = _node_value(G, node_attr)
+
+    if rc_order is None:
+        ordering = list({node_value(n) for n in G})
+    else:
+        ordering = rc_order
+
+    N = len(ordering)
+    undirected = not G.is_directed()
+    index = dict(zip(ordering, range(N)))
+    M = np.zeros((N, N), dtype=dtype, order=order)
+
+    seen = set()
+    for u, nbrdict in G.adjacency():
+        for v in nbrdict:
+            # Obtain the node attribute values.
+            i, j = index[node_value(u)], index[node_value(v)]
+            if v not in seen:
+                M[i, j] += edge_value(u, v)
+                if undirected:
+                    M[j, i] = M[i, j]
+
+        if undirected:
+            seen.add(u)
+
+    if normalized:
+        M /= M.sum(axis=1).reshape((N, 1))
+
+    M = np.asmatrix(M)
+
+    if rc_order is None:
+        return M, ordering
+    else:
+        return M
+
+
+def attr_sparse_matrix(
+    G, edge_attr=None, node_attr=None, normalized=False, rc_order=None, dtype=None
+):
+    """Returns a SciPy sparse matrix using attributes from G.
+
+    If only `G` is passed in, then the adjacency matrix is constructed.
+
+    Let A be a discrete set of values for the node attribute `node_attr`. Then
+    the elements of A represent the rows and columns of the constructed matrix.
+    Now, iterate through every edge e=(u,v) in `G` and consider the value
+    of the edge attribute `edge_attr`.  If ua and va are the values of the
+    node attribute `node_attr` for u and v, respectively, then the value of
+    the edge attribute is added to the matrix element at (ua, va).
+
+    Parameters
+    ----------
+    G : graph
+        The NetworkX graph used to construct the NumPy matrix.
+
+    edge_attr : str, optional
+        Each element of the matrix represents a running total of the
+        specified edge attribute for edges whose node attributes correspond
+        to the rows/cols of the matirx. The attribute must be present for
+        all edges in the graph. If no attribute is specified, then we
+        just count the number of edges whose node attributes correspond
+        to the matrix element.
+
+    node_attr : str, optional
+        Each row and column in the matrix represents a particular value
+        of the node attribute.  The attribute must be present for all nodes
+        in the graph. Note, the values of this attribute should be reliably
+        hashable. So, float values are not recommended. If no attribute is
+        specified, then the rows and columns will be the nodes of the graph.
+
+    normalized : bool, optional
+        If True, then each row is normalized by the summation of its values.
+
+    rc_order : list, optional
+        A list of the node attribute values. This list specifies the ordering
+        of rows and columns of the array. If no ordering is provided, then
+        the ordering will be random (and also, a return value).
+
+    Other Parameters
+    ----------------
+    dtype : NumPy data-type, optional
+        A valid NumPy dtype used to initialize the array. Keep in mind certain
+        dtypes can yield unexpected results if the array is to be normalized.
+        The parameter is passed to numpy.zeros(). If unspecified, the NumPy
+        default is used.
+
+    Returns
+    -------
+    M : SciPy sparse matrix
+        The attribute matrix.
+
+    ordering : list
+        If `rc_order` was specified, then only the matrix is returned.
+        However, if `rc_order` was None, then the ordering used to construct
+        the matrix is returned as well.
+
+    Examples
+    --------
+    Construct an adjacency matrix:
+
+    >>> G = nx.Graph()
+    >>> G.add_edge(0, 1, thickness=1, weight=3)
+    >>> G.add_edge(0, 2, thickness=2)
+    >>> G.add_edge(1, 2, thickness=3)
+    >>> M = nx.attr_sparse_matrix(G, rc_order=[0, 1, 2])
+    >>> M.todense()
+    matrix([[0., 1., 1.],
+            [1., 0., 1.],
+            [1., 1., 0.]])
+
+    Alternatively, we can obtain the matrix describing edge thickness.
+
+    >>> M = nx.attr_sparse_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
+    >>> M.todense()
+    matrix([[0., 1., 2.],
+            [1., 0., 3.],
+            [2., 3., 0.]])
+
+    We can also color the nodes and ask for the probability distribution over
+    all edges (u,v) describing:
+
+        Pr(v has color Y | u has color X)
+
+    >>> G.nodes[0]["color"] = "red"
+    >>> G.nodes[1]["color"] = "red"
+    >>> G.nodes[2]["color"] = "blue"
+    >>> rc = ["red", "blue"]
+    >>> M = nx.attr_sparse_matrix(G, node_attr="color", normalized=True, rc_order=rc)
+    >>> M.todense()
+    matrix([[0.33333333, 0.66666667],
+            [1.        , 0.        ]])
+
+    For example, the above tells us that for all edges (u,v):
+
+        Pr( v is red  | u is red)  = 1/3
+        Pr( v is blue | u is red)  = 2/3
+
+        Pr( v is red  | u is blue) = 1
+        Pr( v is blue | u is blue) = 0
+
+    Finally, we can obtain the total weights listed by the node colors.
+
+    >>> M = nx.attr_sparse_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
+    >>> M.todense()
+    matrix([[3., 2.],
+            [2., 0.]])
+
+    Thus, the total weight over all edges (u,v) with u and v having colors:
+
+        (red, red)   is 3   # the sole contribution is from edge (0,1)
+        (red, blue)  is 2   # contributions from edges (0,2) and (1,2)
+        (blue, red)  is 2   # same as (red, blue) since graph is undirected
+        (blue, blue) is 0   # there are no edges with blue endpoints
+
+    """
+    try:
+        import numpy as np
+        from scipy import sparse
+    except ImportError as e:
+        raise ImportError(
+            "attr_sparse_matrix() requires scipy: " "http://scipy.org/ "
+        ) from e
+
+    edge_value = _edge_value(G, edge_attr)
+    node_value = _node_value(G, node_attr)
+
+    if rc_order is None:
+        ordering = list({node_value(n) for n in G})
+    else:
+        ordering = rc_order
+
+    N = len(ordering)
+    undirected = not G.is_directed()
+    index = dict(zip(ordering, range(N)))
+    M = sparse.lil_matrix((N, N), dtype=dtype)
+
+    seen = set()
+    for u, nbrdict in G.adjacency():
+        for v in nbrdict:
+            # Obtain the node attribute values.
+            i, j = index[node_value(u)], index[node_value(v)]
+            if v not in seen:
+                M[i, j] += edge_value(u, v)
+                if undirected:
+                    M[j, i] = M[i, j]
+
+        if undirected:
+            seen.add(u)
+
+    if normalized:
+        norms = np.asarray(M.sum(axis=1)).ravel()
+        for i, norm in enumerate(norms):
+            M[i, :] /= norm
+
+    if rc_order is None:
+        return M, ordering
+    else:
+        return M