Source code for topologic.statistics.betweenness_centrality

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import networkx as nx
import numpy as np
import random
from .defined_histogram import DefinedHistogram
from typing import List, Optional, Union
from .make_cuts import MakeCuts, filter_function_for_make_cuts

[docs]def histogram_betweenness_centrality( graph: nx.Graph, bin_directive: Union[int, List[Union[float, int]], np.ndarray, str] = 10, num_random_samples: Optional[int] = None, normalized: bool = True, weight_attribute: Optional[str] = None, include_endpoints: bool = False, random_seed: Optional[Union[int, random.Random]] = None ) -> DefinedHistogram: """ Generates a histogram of the vertex betweenness centrality of the provided graph. Histogram function is fundamentally proxied through to numpy's `histogram` function, and bin selection follows `numpy.histogram` processes. The betweenness centrality calculation can take advantage of networkx' implementation of randomized sampling by providing num_random_samples (or k, in networkx betweenness_centrality nomenclature). See: for more details. :param networkx.Graph graph: the graph. No changes will be made to it. :param bin_directive: Is passed directly through to numpy's "histogram" (and thus, "histogram_bin_edges") functions. See: In short description: if an int is provided, we use `bin_directive` number of equal range bins. If a sequence is provided, these bin edges will be used and can be sized to whatever size you prefer. Note that the np.ndarray should be ndim=1 and the values should be float or int. :type bin_directive: Union[int, List[Union[float, int]], numpy.ndarray, str] :param Optional[int] num_random_samples: Use num_random_samples for vertex samples to *estimate* betweeness. num_random_samples should be <= len(graph.nodes). The larger num_random_samples is, the better the approximation. :param bool normalized: If True the betweenness values are normalized by 2/((n-1)(n-2)) for graphs, and 1/((n-1)(n-2)) for directed graphs where n is the number of vertices in the graph. :param Optional[str] weight_attribute: If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. :param bool include_endpoints: If True include the endpoints in the shortest path counts. :param random_seed: Random seed or preconfigured random instance to be used for randomly selecting random samples. Only used if num_random_samples is set. None will generate a new random state. Specifying a random state will provide consistent results between runs. :type random_seed: Optional[Union[int, random.Random]] :return: A named tuple that contains the histogram and the bin_edges used in the histogram :rtype: DefinedHistogram """ # noqa:501 betweenness_centrality_dict = nx.betweenness_centrality( G=graph, k=num_random_samples, normalized=normalized, weight=weight_attribute, endpoints=include_endpoints, seed=random_seed ) histogram, bin_edges = np.histogram( list(betweenness_centrality_dict.values()), bin_directive ) return DefinedHistogram(histogram=histogram, bin_edges=bin_edges)
[docs]def cut_vertices_by_betweenness_centrality( graph: nx.Graph, cut_threshold: Union[int, float], cut_process: MakeCuts, num_random_samples: Optional[int] = None, normalized: bool = True, weight_attribute: Optional[str] = None, include_endpoints: bool = False, random_seed: Optional[Union[int, random.Random]] = None ) -> nx.Graph: """ Given a graph and a cut_threshold and a cut_process, return a copy of the graph with the vertices outside of the cut_threshold. The betweenness centrality calculation can take advantage of networkx' implementation of randomized sampling by providing num_random_samples (or k, in networkx betweenness_centrality nomenclature). See: for more details. :param networkx.Graph graph: The graph that will be copied and pruned. :param cut_threshold: The threshold for making cuts based on betweenness centrality. :type cut_threshold: Union[int, float] :param MakeCuts cut_process: Describes how we should make the cut; cut all edges larger or smaller than the cut_threshold, and whether exclusive or inclusive. :param Optional[int] num_random_samples: Use num_random_samples for vertex samples to *estimate* betweenness. num_random_samples should be <= len(graph.nodes). The larger num_random_samples is, the better the approximation. :param bool normalized: If True the betweenness values are normalized by 2/((n-1)(n-2)) for graphs, and 1/((n-1)(n-2)) for directed graphs where n is the number of vertices in the graph. :param Optional[str] weight_attribute: If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. :param bool include_endpoints: If True include the endpoints in the shortest path counts. :param random_seed: Random seed or preconfigured random instance to be used for randomly selecting random samples. Only used if num_random_samples is set. None will generate a new random state. Specifying a random state will provide consistent results between runs. :type random_seed: Optional[Union[int, random.Random]] :return: Pruned copy of the graph :rtype: networkx.Graph """ # noqa:501 graph_copy = graph.copy() betweenness_centrality_dict = nx.betweenness_centrality( G=graph, k=num_random_samples, normalized=normalized, weight=weight_attribute, endpoints=include_endpoints, seed=random_seed ) filter_by = filter_function_for_make_cuts(cut_threshold, cut_process) vertices_to_cut = list(filter(filter_by, betweenness_centrality_dict.items())) for vertex, degree_centrality in vertices_to_cut: graph_copy.remove_node(vertex) return graph_copy