Source code for topologic.embedding.adjacency_spectral_embedding

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Optional

import networkx as nx

from .embedding_container import EmbeddingContainer
from .spectral_embedding import _create_augmented_adjacency_matrix, \
    _generate_embedding
from .. import assertions


[docs]def adjacency_embedding( graph: nx.Graph, maximum_dimensions: int = 100, elbow_cut: Optional[int] = 1, weight_column: str = 'weight', svd_seed: Optional[int] = None, num_iterations: int = 5, power_iteration_normalizer: str = 'QR', num_oversamples: int = 10 ) -> EmbeddingContainer: """ Generates a spectral embedding based upon the adjacency matrix of the graph. See also: https://csustan.csustan.edu/~tom/Clustering/GraphLaplacian-tutorial.pdf :param networkx.Graph graph: graph_augmented_sparse_matrix networkx Graph object containing no more than one connected component. Note that if the graph is a directed graph, the resulting dimensionality of the embedding will be twice that of an undirected graph :param int maximum_dimensions: Maximum dimensions of embeddings that will be returned - defaults to 100. Actual dimensions of resulting embeddings should be significantly smaller, but will never be over this value. :param Optional[int] elbow_cut: scree plot elbow detection will detect (usually) many elbows. This value specifies which elbow to use prior to filtering out extraneous dimensions. If None, then an embedding of size `maximum_dimensions` will be returned. :param str weight_column: The weight column to use in the Graph. :param Optional[int] svd_seed: If not provided, uses a random number every time, making consistent results difficult Set this to a random int if you want consistency between executions over the same graph. :param int num_iterations: The number of iterations to be used in the svd solver. :param int num_oversamples: Additional number of random vectors to sample the range of M so as to ensure proper conditioning. The total number of random vectors used to find the range of M is n_components + n_oversamples. Smaller number can improve speed but can negatively impact the quality of approximation of singular vectors and singular values. :param Optional[str] power_iteration_normalizer: Whether the power iterations are normalized with step-by-step QR factorization (the slowest but most accurate), 'none' (the fastest but numerically unstable when `n_iter` is large, e.g. typically 5 or larger), or 'LU' factorization (numerically stable but can lose slightly in accuracy). The 'auto' mode applies no normalization if `num_iterations` <= 2 and switches to LU otherwise. Options: 'auto' (default), 'QR', 'LU', 'none' :return: EmbeddingContainer containing a matrix, which itself contains the embedding for each node. the tuple also contains a vector containing the corresponding vertex labels for each row in the matrix. the matrix and vector are positionally correlated. :rtype: EmbeddingContainer """ assertions.assert_single_connected_components( graph, "Run this algorithm over the largest connected component (see: topologic.largest_connected_component()) or " + "run it over every connected component separately." ) working_graph = graph.copy() augmented_graph, graph_matrix, sorted_vertices = _create_augmented_adjacency_matrix(weight_column, working_graph) minimum_matrix_dimension = min(graph_matrix.shape) embedding = _generate_embedding( elbow_cut, nx.is_directed(graph), graph_matrix, maximum_dimensions, minimum_matrix_dimension, num_oversamples, num_iterations, power_iteration_normalizer, svd_seed ) result = EmbeddingContainer(embedding=embedding, vertex_labels=sorted_vertices) return result