Source code for TELF.factorization.decompositions.utilities.bool_clustering

from .generic_utils import get_np

from scipy.spatial.distance import cdist
import numpy as np


[docs] def custom_bool_clustering(W_all, centroids=None, max_iters=100, distance="hamming"): # * options for distance: ' false negative', 'false positive', 'distance from cdist # ? change this function to use different distance, and use different centroids np = get_np(W_all) dtype = W_all.dtype (N, K, n_perts) = W_all.shape if centroids is None: centroids = W_all[:, :, 0] for iteration in range(max_iters): should_break = True for perturbation in range(n_perts): #! distance step dist = _compute_distance( centroids, W_all[:, :, perturbation], distance=distance ) permutation = [i for i in range(K)] for _ in range(K): r, c = np.unravel_index(np.argmin(dist), dist.shape) r = int(r) c = int(c) permutation[r] = c dist[r, :] = 100 dist[:, c] = 100 W_all[:, :, perturbation] = W_all[:, permutation, perturbation] if permutation != [i for i in range(K)]: should_break = False #! centroid step centroids = _compute_Bool_centroids(W_all, distance=distance) if should_break: break return (centroids, W_all)
def _compute_distance(W1, W2, distance="hamming"): k = W1.shape[1] dist = np.empty((k, k)) # store the distance if distance == "FN": for i in range(k): for j in range(k): dist[i, j] = np.mean(np.logical_and(W1[:, i] == 1, W2[:, j] == 0)) elif distance == "FP": for i in range(k): for j in range(k): dist[i, j] = np.mean(np.logical_and(W1[:, i] == 0, W2[:, j] == 1)) else: # use cdist dist = cdist(W1.T, W2.T, metric=distance) return dist def _compute_Bool_centroids(W_all, distance="hamming", centroidfunc=None): k = W_all.shape[1] if centroidfunc is None: if distance == "FN": centroids = np.logical_and.reduce(W_all, axis=2) elif distance == "FP": centroids = np.logical_or.reduce(W_all, axis=2) else: centroids = np.median(W_all, axis=2) else: centroids = centroidfunc(W_all, axis=2) return centroids