Source code for TELF.post_processing.SeaLion.sealion

# DEPENDENCIES
import numpy as np
from pathlib import Path
import warnings
import os 

# SeaLion Tools
from .tools.get_factorization_results import copy_results
from .tools.get_correlation_matrix import  corr_matrix
from .tools.get_mixing_patterns import mixing_patterns
from .tools.get_H_clustering import H_clustering
from .tools.get_recommendations import recommendations
from .tools.get_recommendations import recommendations_masked
from .tools.get_recommendations import recommendation_graph
from .tools.get_wordclouds import words_probabilities
from .tools.get_wordclouds import word_cloud
from .tools.get_W_plot import W_plot
from .tools.get_W_plot import get_W_sub
from .tools.get_W_plot import W_UMAP
from .tools.get_X import original_X
from .tools.get_X import X_tilda

[docs] class SeaLion: def __init__( self, save_path="SeaLion/", X=None, W=None, S=None, H=None, bu=None, bi=None, global_mean=0, figsize1=None, figsize2=None, rows=None, cols=None, rows_name="Features", cols_name="Samples", num_top_words=50, edge_weight_multiplier=1, num_top_recommendations=30, recommend_probabilities=False, factorization_paths=[], UNKNOWN_MASK=None, KNOWN_MASK=None, verbose=True) -> None: # # SeaLion object variables # self.save_path = save_path self.X = X self.W = W self.S = S self.H = H if bu is None and W is not None: self.bu = np.zeros(W.shape[0]) else: self.bu = bu if bi is None and H is not None: self.bi = np.zeros(H.shape[1]) else: self.bi = bi self.global_mean = global_mean if rows is None: self.rows = np.arange(0, X.shape[0], 1).astype("int32") else: self.rows = np.array(rows) if cols is None: self.cols = np.arange(0, X.shape[1], 1).astype("int32") else: self.cols = np.array(cols) self.rows_name = rows_name self.cols_name = cols_name self.figsize1 = figsize1 self.figsize2 = figsize2 self.num_top_words = num_top_words self.num_top_recommendations = num_top_recommendations self.recommend_probabilities = recommend_probabilities self.UNKNOWN_MASK = UNKNOWN_MASK self.KNOWN_MASK = KNOWN_MASK self.factorization_paths = factorization_paths self.verbose = verbose self.edge_weight_multiplier = edge_weight_multiplier # # Results path # if not Path(self.save_path).is_dir(): Path(self.save_path).mkdir(parents=True) # # Organize topic paths # if W is not None: K = W.shape[1] elif H is not None: K = H.shape[0] else: K = 0 warnings.warn("W and H were both not passed. Could not create directories for clusters!") for k in range(K): curr_path = os.path.join(self.save_path, f'{k}') if not Path(curr_path).is_dir(): Path(curr_path).mkdir(parents=True) # # Other variables # self.words = None self.probabilities = None self.Wsub_name_idx_map = None self.W_sub = None self.Wsub_mask = None def __call__(self): if self.verbose: print("Starting general post-processing") self.get_factorization_results() self.get_W_correlation() self.get_H_correlation() self.get_S_mixing_patterns() self.get_H_clustering() self.get_words_probabilities() self.get_wordclouds() self.get_W_plot() self.get_W_UMAP() self.get_original_data() self.get_X_tilda() self.get_recommendations() self.get_masked_recommendations() self.get_recommendations_graph() if self.verbose: print("Done")
[docs] def get_masked_recommendations(self): if self.UNKNOWN_MASK is None or self.KNOWN_MASK is None: if self.verbose: print("Skipping getting unknown masked recommendations.") return recommendations_masked( UNKNOWN_MASK=self.UNKNOWN_MASK, KNOWN_MASK=self.KNOWN_MASK, W=self.W, H=self.H, S=self.S, bi=self.bi, bu=self.bu, global_mean=self.global_mean, rows=self.rows, cols=self.cols, save_path=self.save_path, num_top_recommendations=self.num_top_recommendations, recommend_probabilities=self.recommend_probabilities, cols_name=self.cols_name, rows_name=self.rows_name )
[docs] def get_recommendations_graph(self): if self.UNKNOWN_MASK is None or self.KNOWN_MASK is None: if self.verbose: print("Skipping getting recommendations graph.") return recommendation_graph( UNKNOWN_MASK=self.UNKNOWN_MASK, KNOWN_MASK=self.KNOWN_MASK, W=self.W, H=self.H, S=self.S, bi=self.bi, bu=self.bu, global_mean=self.global_mean, rows=self.rows, cols=self.cols, num_top_recommendations=self.num_top_recommendations, recommend_probabilities=self.recommend_probabilities, save_path=self.save_path, edge_weight_multiplier = self.edge_weight_multiplier )
[docs] def get_recommendations(self): recommendations( W=self.W, H=self.H, S=self.S, bi=self.bi, bu=self.bu, global_mean=self.global_mean, rows=self.rows, cols=self.cols, save_path=self.save_path, num_top_recommendations=self.num_top_recommendations, recommend_probabilities=self.recommend_probabilities )
[docs] def get_X_tilda(self): X_tilda( W=self.W, H=self.H, S=self.S, bi=self.bi, bu=self.bu, global_mean=self.global_mean, rows=self.rows, cols=self.cols, save_path=self.save_path )
[docs] def get_original_data(self): original_X(X=self.X, rows=self.rows, cols=self.cols, save_path=self.save_path)
[docs] def get_factorization_results(self, factorization_name="factorization"): if len(self.factorization_paths) > 0: copy_results( factorization_paths=self.factorization_paths, save_path=self.save_path, factorization_name=factorization_name ) else: if self.verbose: print("Skipping copying the factorization results.")
[docs] def get_W_correlation(self): if self.W is None: if self.verbose: print("Skipping getting W Factor Column Wise Correlation.") return corr_matrix(A=self.W, name="W", save_path=self.save_path)
[docs] def get_H_correlation(self): if self.H is None: if self.verbose: print("Skipping getting H Factor Row Wise Correlation.") return corr_matrix(A=self.H.T, name="H", save_path=self.save_path)
[docs] def get_S_mixing_patterns(self): if self.S is None: if self.verbose: print("Skipping getting S mixing matrix patterns") return mixing_patterns(S=self.S, save_path=self.save_path)
[docs] def get_H_clustering(self): if self.H is None: if self.verbose: print("Skipping getting the H clustering results") return H_clustering(H=self.H, S=self.S, save_path=self.save_path, cols_name=self.cols_name, cols=self.cols, figsize1=self.figsize1)
[docs] def get_words_probabilities(self): if self.W is None: if self.verbose: print("Skipping getting top words and their probabilities") return self.words, self.probabilities = words_probabilities(W=self.W, save_path=self.save_path, rows=self.rows, rows_name=self.rows_name, num_top_words=self.num_top_words)
[docs] def get_wordclouds(self): if self.W is None: if self.verbose: print("Skipping getting top words and their probabilities") return self.words, self.probabilities = words_probabilities(W=self.W, save_path=self.save_path, rows=self.rows, rows_name=self.rows_name, num_top_words=self.num_top_words) word_cloud(self.words, self.probabilities, self.save_path, max_words=self.num_top_words, background_color='white', format='png')
[docs] def get_W_plot(self): if self.W is None: if self.verbose: print("Skipping getting the W plot") return self.words, self.probabilities = words_probabilities(W=self.W, save_path=self.save_path, rows=self.rows, rows_name=self.rows_name, num_top_words=self.num_top_words) self.Wsub_name_idx_map, self.W_sub, self.Wsub_mask = get_W_sub( words=self.words, probabilities=self.probabilities, save_path=self.save_path, rows_name=self.rows_name, num_top_words=self.num_top_words) W_plot( W_sub=self.W_sub, Wsub_mask=self.Wsub_mask, Wsub_name_idx_map=self.Wsub_name_idx_map, num_top_words=self.num_top_words, rows_name=self.rows_name, save_path=self.save_path, figsize2=self.figsize2 )
[docs] def get_W_UMAP(self, args={}): if self.W is None: if self.verbose: print("Skipping getting the W UMAP plot") return self.words, self.probabilities = words_probabilities(W=self.W, save_path=self.save_path, rows=self.rows, rows_name=self.rows_name, num_top_words=self.num_top_words) self.Wsub_name_idx_map, self.W_sub, self.Wsub_mask = get_W_sub( words=self.words, probabilities=self.probabilities, save_path=self.save_path, rows_name=self.rows_name, num_top_words=self.num_top_words) W_UMAP(W_sub=self.W_sub, Wsub_name_idx_map=self.Wsub_name_idx_map, save_path=self.save_path, num_top_words=self.num_top_words, args=args)