Source code for topologic.similarity

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from sklearn.metrics.cluster import adjusted_rand_score
from typing import Any, Dict
import numpy as np

__all__ = ["ari"]


[docs]def ari( primary_partition: Dict[Any, int], test_partition: Dict[Any, int], ) -> float: """ Given two partition schemas, a primary partition mapping (the most accurate representation of truth) and the test partition mapping (to be scored against that accurate representation of truth), calculate the Adjusted Rand Index. See https://en.wikipedia.org/wiki/Rand_index :param Dict[Any, int] primary_partition: The most accurate representation of truth for cluster or community membership of nodes. The keys are vertex labels and the values are the cluster/community/partition labels. :param Dict[Any, int] test_partition: The partition mapping to compare against the primary partition. The keys are vertex labels and the values are the cluster/community/partition labels. :return: The adjusted rand index for the two mappings :rtype float: :raises ValueError: If the primary partition and test partition do not have an identical vertex label set. """ if primary_partition.keys() != test_partition.keys(): raise ValueError("The reference partition provided does not contain the exact same keys as the predicted " "clusters; an ari score cannot be generated automatically.") size = len(primary_partition.keys()) primary = np.empty(size, dtype=int) test = np.empty(size, dtype=int) for i, vertex in enumerate(primary_partition.keys()): primary[i] = primary_partition[vertex] test[i] = test_partition[vertex] return adjusted_rand_score(labels_true=primary, labels_pred=test)