Source code for model_package.Calibrate.joint_probability_distributions

import os
import sys
import argparse
import time
import glob
import yaml
import inspect

import seaborn
import matplotlib
import matplotlib.pyplot
import pandas
import numpy
import scipy


[docs] def plot_distributions(dfs, columns, num_domains, distribution_plots): ''' Plot normal distributions across all filtering domain studies for each parameter :param list dfs: List of Pandas DataFrames containing calibration results for each filtering domain study :param dataframe columns: Pandas DataFrame Index containing parameter names to summarize :param list num_domains: A list of integers corresponding to the number of filtering domains associated with results contained in each csv file' :param str distribution_plots: Root file name for distribution plots of each parameter :returns: Write ``{distribution_plots}_{parameter}.png`` plot file for each calibraiton parameter ''' for column in columns: parameter = str(column[2:-1]) matplotlib.pyplot.figure() for df, num_domain in zip(dfs, num_domains): mean = numpy.mean(df[column]) std = numpy.mean(df[column]) x_axis = numpy.linspace(numpy.min(df[column]), numpy.max(df[column]), 100) matplotlib.pyplot.plot(x_axis, scipy.stats.norm.pdf(x_axis, mean, std), label=f'{num_domain} domains') matplotlib.pyplot.xlabel(column) matplotlib.pyplot.legend() matplotlib.pyplot.tight_layout() matplotlib.pyplot.savefig(f'{distribution_plots}_{parameter}.png') return 0
[docs] def full_kde_plot(df, columns, full_kde): '''Plot combined KDE across all filtering domain studies for each parameter :param dataframe df: Pandas DataFrame containing results from all filtering domains :param dataframe columns: Pandas DataFrame Index containing parameter names to summarize :param str full_kde: Root file name for KDE of each parameter :returns: Write ``{full_kde}_{parameter}.png`` plot file for each calibration parameter ''' df = df.drop('domains', axis=1) for column in columns: matplotlib.pyplot.figure() data = numpy.array(df[column]) parameter = str(column[2:-1]) ax = seaborn.kdeplot(data, color='red', fill=False) xs, ys = ax.lines[-1].get_data() ax.fill_between(xs, ys, color='red', alpha=0.1) mode_idx = numpy.argmax(ys) #ax.vlines(xs[mode_idx], 0, ys[mode_idx,], ls="--", color='red') #ax.text(xs[mode_idx], -0.1, f'{xs[mode_idx]:.6f}', color='k', ha='center', transform=ax.get_xaxis_transform()) best_value = f'{xs[mode_idx]:.6f}' ax.set(xlabel=column) matplotlib.pyplot.tight_layout() matplotlib.pyplot.title(f'Best {column} = {best_value}') matplotlib.pyplot.savefig(f'{full_kde}_{parameter}.png') return 0
[docs] def joint_probability_distributions(output_file, csv_files, num_domains, num_params=None, distribution_plots=None, full_kde=None): '''Create a joint probability distribution plot to summarize calibration results :param str output_file: The output filename for the joint probability distribution plot" :param list csv_files: The csv files containing calibration results :param list num_domains: A list of integers corresponding to the number of filtering domains associated with results contained in each csv file' :param int num_params: The number of parameters to make a joint probability plot with if not all are desired' :param str distribution_plots: Optional root file name for distribution plots of each parameter :param str full_kde: Optional root file name for KDE of each parameter :returns: ``output_file`` ''' output_plot = output_file.split('.')[0] matplotlib.pyplot.figure(output_plot) matplotlib.rcParams["axes.labelsize"] = 20 seaborn.set(font_scale=1.7) headers = {'lamb': r'$\lambda$', 'mu': r'$\mu$', 'eta': r'$\eta$', 'tau': r'$\tau$', 'kappa': r'$\kappa$', 'nu': r'$\nu$', 'sigma': r'$\sigma$', 'tau1': r'$\tau_1$', 'tau2': r'$\tau_2$', 'tau3': r'$\tau_3$', 'tau4': r'$\tau_4$', 'tau5': r'$\tau_5$', 'tau6': r'$\tau_6$', 'tau7': r'$\tau_7$', 'tau8': r'$\tau_8$', 'tau9': r'$\tau_9$', 'tau10': r'$\tau_{10}$', 'tau11': r'$\tau_{11}$'} dfs = [] for csv_file, num_domain in zip(csv_files, num_domains): df = pandas.read_csv(csv_file, sep=',', index_col=0) # Reassign headers original_headers = list(df.columns) total_params = len(original_headers) header_map = {} for head in original_headers: header_map[head] = headers[head] df.rename(columns=header_map, inplace=True) # assign domain number df = df.assign(domains=[num_domain for i in range(0, len(df.index))]) dfs.append(df) # plot normal distributions columns = dfs[0].columns[0:-1] if distribution_plots: plot_distributions(dfs, columns, num_domains, distribution_plots) # Concatenate DataFrames final_df = pandas.concat(dfs[::-1]) print(final_df) # Full kde if full_kde: full_kde_plot(final_df, columns, full_kde) # Hand the number of parameters to plot if num_params == None: num_params = total_params col_slice = list(range(0, num_params)) col_slice.append(len(final_df.columns)-1) # Set hue order based on number of domains hue_order = [str(num_domain) for num_domain in num_domains[::-1]] # Create pairplot g = seaborn.PairGrid(final_df.iloc[:,col_slice], hue='domains', hue_order=hue_order, corner=True) g.map_diag(seaborn.histplot, stat='percent', common_norm=False, multiple="stack", bins=20) g.map_lower(seaborn.scatterplot) g.add_legend(title='Domains', fontsize='15') # Format labels for ax in g.axes.flatten(): if ax: ax.ticklabel_format(style='sci', scilimits=(0,0), axis='both') if len(ax.get_ylabel()) > 0: ax.set_ylabel(ax.get_ylabel(), rotation='horizontal', labelpad=20) ax.yaxis.get_label().set_horizontalalignment('right') if len(ax.get_xlabel()) > 0: ax.set_xlabel(ax.get_xlabel(), labelpad=20) matplotlib.pyplot.tight_layout() matplotlib.pyplot.savefig(output_file, dpi=300) return 0
def get_parser(): filename = inspect.getfile(lambda: None) basename = os.path.basename(filename) basename_without_extension, extension = os.path.splitext(basename) cli_description = "Create a joint probability distribution plot to summarize calibration results" parser = argparse.ArgumentParser(description=cli_description, prog=os.path.basename(filename)) parser.add_argument('-o', '--output-file', type=str, required=True, help="Specify the output filename for the joint probability distribution plot") parser.add_argument('--num-params', type=int, required=False, help='Optionally specify the number of parameters to make a joint probability plot with if not all are desired') parser.add_argument('--csv-files', nargs="+", required=True, help='The csv files containing calibration results') parser.add_argument('--num-domains', nargs="+", required=True, help="A list of integers corresponding to the number of filtering domains\ associated with results contained in each csv file") parser.add_argument('--distribution-plots', type=str, required=False, help="Optional root file name for distribution plots of each parameter") parser.add_argument('--full-kde', type=str, required=False, help="Optional root file name for KDE of each parameter") return parser if __name__ == '__main__': parser = get_parser() args, unknown = parser.parse_known_args() sys.exit(joint_probability_distributions(output_file=args.output_file, csv_files=args.csv_files, num_domains=args.num_domains, num_params=args.num_params, distribution_plots=args.distribution_plots, full_kde=args.full_kde, ))