Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/networkx/algorithms/centrality/betweenness.py
+++ b/venv/Lib/site-packages/networkx/algorithms/centrality/betweenness.py
@ -0,0 +1,392 @@
+"""Betweenness centrality measures."""
+from heapq import heappush, heappop
+from itertools import count
+import warnings
+
+from networkx.utils import py_random_state
+from networkx.utils.decorators import not_implemented_for
+
+__all__ = ["betweenness_centrality", "edge_betweenness_centrality", "edge_betweenness"]
+
+
+@py_random_state(5)
+@not_implemented_for("multigraph")
+def betweenness_centrality(
+    G, k=None, normalized=True, weight=None, endpoints=False, seed=None
+):
+    r"""Compute the shortest-path betweenness centrality for nodes.
+
+    Betweenness centrality of a node $v$ is the sum of the
+    fraction of all-pairs shortest paths that pass through $v$
+
+    .. math::
+
+       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
+
+    where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
+    shortest $(s, t)$-paths,  and $\sigma(s, t|v)$ is the number of
+    those paths  passing through some  node $v$ other than $s, t$.
+    If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
+    $\sigma(s, t|v) = 0$ [2]_.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.
+
+    k : int, optional (default=None)
+      If k is not None use k node samples to estimate betweenness.
+      The value of k <= n where n is the number of nodes in the graph.
+      Higher values give better approximation.
+
+    normalized : bool, optional
+      If True the betweenness values are normalized by `2/((n-1)(n-2))`
+      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
+      is the number of nodes in G.
+
+    weight : None or string, optional (default=None)
+      If None, all edge weights are considered equal.
+      Otherwise holds the name of the edge attribute used as weight.
+
+    endpoints : bool, optional
+      If True include the endpoints in the shortest path counts.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+        Note that this is only used if k is not None.
+
+    Returns
+    -------
+    nodes : dictionary
+       Dictionary of nodes with betweenness centrality as the value.
+
+    See Also
+    --------
+    edge_betweenness_centrality
+    load_centrality
+
+    Notes
+    -----
+    The algorithm is from Ulrik Brandes [1]_.
+    See [4]_ for the original first published version and [2]_ for details on
+    algorithms for variations and related metrics.
+
+    For approximate betweenness calculations set k=#samples to use
+    k nodes ("pivots") to estimate the betweenness values. For an estimate
+    of the number of pivots needed see [3]_.
+
+    For weighted graphs the edge weights must be greater than zero.
+    Zero edge weights can produce an infinite number of equal length
+    paths between pairs of nodes.
+
+    The total number of paths between source and target is counted
+    differently for directed and undirected graphs. Directed paths
+    are easy to count. Undirected paths are tricky: should a path
+    from "u" to "v" count as 1 undirected path or as 2 directed paths?
+
+    For betweenness_centrality we report the number of undirected
+    paths when G is undirected.
+
+    For betweenness_centrality_subset the reporting is different.
+    If the source and target subsets are the same, then we want
+    to count undirected paths. But if the source and target subsets
+    differ -- for example, if sources is {0} and targets is {1},
+    then we are only counting the paths in one direction. They are
+    undirected paths but we are counting them in a directed way.
+    To count them as undirected paths, each should count as half a path.
+
+    References
+    ----------
+    .. [1] Ulrik Brandes:
+       A Faster Algorithm for Betweenness Centrality.
+       Journal of Mathematical Sociology 25(2):163-177, 2001.
+       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
+    .. [2] Ulrik Brandes:
+       On Variants of Shortest-Path Betweenness
+       Centrality and their Generic Computation.
+       Social Networks 30(2):136-145, 2008.
+       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
+    .. [3] Ulrik Brandes and Christian Pich:
+       Centrality Estimation in Large Networks.
+       International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
+       http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
+    .. [4] Linton C. Freeman:
+       A set of measures of centrality based on betweenness.
+       Sociometry 40: 35–41, 1977
+       http://moreno.ss.uci.edu/23.pdf
+    """
+    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
+    if k is None:
+        nodes = G
+    else:
+        nodes = seed.sample(G.nodes(), k)
+    for s in nodes:
+        # single source shortest paths
+        if weight is None:  # use BFS
+            S, P, sigma = _single_source_shortest_path_basic(G, s)
+        else:  # use Dijkstra's algorithm
+            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
+        # accumulation
+        if endpoints:
+            betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s)
+        else:
+            betweenness = _accumulate_basic(betweenness, S, P, sigma, s)
+    # rescaling
+    betweenness = _rescale(
+        betweenness,
+        len(G),
+        normalized=normalized,
+        directed=G.is_directed(),
+        k=k,
+        endpoints=endpoints,
+    )
+    return betweenness
+
+
+@py_random_state(4)
+def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
+    r"""Compute betweenness centrality for edges.
+
+    Betweenness centrality of an edge $e$ is the sum of the
+    fraction of all-pairs shortest paths that pass through $e$
+
+    .. math::
+
+       c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
+
+    where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
+    shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
+    those paths passing through edge $e$ [2]_.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.
+
+    k : int, optional (default=None)
+      If k is not None use k node samples to estimate betweenness.
+      The value of k <= n where n is the number of nodes in the graph.
+      Higher values give better approximation.
+
+    normalized : bool, optional
+      If True the betweenness values are normalized by $2/(n(n-1))$
+      for graphs, and $1/(n(n-1))$ for directed graphs where $n$
+      is the number of nodes in G.
+
+    weight : None or string, optional (default=None)
+      If None, all edge weights are considered equal.
+      Otherwise holds the name of the edge attribute used as weight.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+        Note that this is only used if k is not None.
+
+    Returns
+    -------
+    edges : dictionary
+       Dictionary of edges with betweenness centrality as the value.
+
+    See Also
+    --------
+    betweenness_centrality
+    edge_load
+
+    Notes
+    -----
+    The algorithm is from Ulrik Brandes [1]_.
+
+    For weighted graphs the edge weights must be greater than zero.
+    Zero edge weights can produce an infinite number of equal length
+    paths between pairs of nodes.
+
+    References
+    ----------
+    .. [1]  A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
+       Journal of Mathematical Sociology 25(2):163-177, 2001.
+       http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
+    .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
+       Centrality and their Generic Computation.
+       Social Networks 30(2):136-145, 2008.
+       http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
+    """
+    betweenness = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
+    # b[e]=0 for e in G.edges()
+    betweenness.update(dict.fromkeys(G.edges(), 0.0))
+    if k is None:
+        nodes = G
+    else:
+        nodes = seed.sample(G.nodes(), k)
+    for s in nodes:
+        # single source shortest paths
+        if weight is None:  # use BFS
+            S, P, sigma = _single_source_shortest_path_basic(G, s)
+        else:  # use Dijkstra's algorithm
+            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
+        # accumulation
+        betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
+    # rescaling
+    for n in G:  # remove nodes to only return edges
+        del betweenness[n]
+    betweenness = _rescale_e(
+        betweenness, len(G), normalized=normalized, directed=G.is_directed()
+    )
+    return betweenness
+
+
+# obsolete name
+def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):
+    warnings.warn(
+        "edge_betweeness is replaced by edge_betweenness_centrality", DeprecationWarning
+    )
+    return edge_betweenness_centrality(G, k, normalized, weight, seed)
+
+
+# helpers for betweenness centrality
+
+
+def _single_source_shortest_path_basic(G, s):
+    S = []
+    P = {}
+    for v in G:
+        P[v] = []
+    sigma = dict.fromkeys(G, 0.0)  # sigma[v]=0 for v in G
+    D = {}
+    sigma[s] = 1.0
+    D[s] = 0
+    Q = [s]
+    while Q:  # use BFS to find shortest paths
+        v = Q.pop(0)
+        S.append(v)
+        Dv = D[v]
+        sigmav = sigma[v]
+        for w in G[v]:
+            if w not in D:
+                Q.append(w)
+                D[w] = Dv + 1
+            if D[w] == Dv + 1:  # this is a shortest path, count paths
+                sigma[w] += sigmav
+                P[w].append(v)  # predecessors
+    return S, P, sigma
+
+
+def _single_source_dijkstra_path_basic(G, s, weight):
+    # modified from Eppstein
+    S = []
+    P = {}
+    for v in G:
+        P[v] = []
+    sigma = dict.fromkeys(G, 0.0)  # sigma[v]=0 for v in G
+    D = {}
+    sigma[s] = 1.0
+    push = heappush
+    pop = heappop
+    seen = {s: 0}
+    c = count()
+    Q = []  # use Q as heap with (distance,node id) tuples
+    push(Q, (0, next(c), s, s))
+    while Q:
+        (dist, _, pred, v) = pop(Q)
+        if v in D:
+            continue  # already searched this node.
+        sigma[v] += sigma[pred]  # count paths
+        S.append(v)
+        D[v] = dist
+        for w, edgedata in G[v].items():
+            vw_dist = dist + edgedata.get(weight, 1)
+            if w not in D and (w not in seen or vw_dist < seen[w]):
+                seen[w] = vw_dist
+                push(Q, (vw_dist, next(c), v, w))
+                sigma[w] = 0.0
+                P[w] = [v]
+            elif vw_dist == seen[w]:  # handle equal paths
+                sigma[w] += sigma[v]
+                P[w].append(v)
+    return S, P, sigma
+
+
+def _accumulate_basic(betweenness, S, P, sigma, s):
+    delta = dict.fromkeys(S, 0)
+    while S:
+        w = S.pop()
+        coeff = (1 + delta[w]) / sigma[w]
+        for v in P[w]:
+            delta[v] += sigma[v] * coeff
+        if w != s:
+            betweenness[w] += delta[w]
+    return betweenness
+
+
+def _accumulate_endpoints(betweenness, S, P, sigma, s):
+    betweenness[s] += len(S) - 1
+    delta = dict.fromkeys(S, 0)
+    while S:
+        w = S.pop()
+        coeff = (1 + delta[w]) / sigma[w]
+        for v in P[w]:
+            delta[v] += sigma[v] * coeff
+        if w != s:
+            betweenness[w] += delta[w] + 1
+    return betweenness
+
+
+def _accumulate_edges(betweenness, S, P, sigma, s):
+    delta = dict.fromkeys(S, 0)
+    while S:
+        w = S.pop()
+        coeff = (1 + delta[w]) / sigma[w]
+        for v in P[w]:
+            c = sigma[v] * coeff
+            if (v, w) not in betweenness:
+                betweenness[(w, v)] += c
+            else:
+                betweenness[(v, w)] += c
+            delta[v] += c
+        if w != s:
+            betweenness[w] += delta[w]
+    return betweenness
+
+
+def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):
+    if normalized:
+        if endpoints:
+            if n < 2:
+                scale = None  # no normalization
+            else:
+                # Scale factor should include endpoint nodes
+                scale = 1 / (n * (n - 1))
+        elif n <= 2:
+            scale = None  # no normalization b=0 for all nodes
+        else:
+            scale = 1 / ((n - 1) * (n - 2))
+    else:  # rescale by 2 for undirected graphs
+        if not directed:
+            scale = 0.5
+        else:
+            scale = None
+    if scale is not None:
+        if k is not None:
+            scale = scale * n / k
+        for v in betweenness:
+            betweenness[v] *= scale
+    return betweenness
+
+
+def _rescale_e(betweenness, n, normalized, directed=False, k=None):
+    if normalized:
+        if n <= 1:
+            scale = None  # no normalization b=0 for all nodes
+        else:
+            scale = 1 / (n * (n - 1))
+    else:  # rescale by 2 for undirected graphs
+        if not directed:
+            scale = 0.5
+        else:
+            scale = None
+    if scale is not None:
+        if k is not None:
+            scale = scale * n / k
+        for v in betweenness:
+            betweenness[v] *= scale
+    return betweenness