Source code for shapash.explainer.consistency

import copy
import itertools

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from category_encoders import OrdinalEncoder
from plotly import graph_objs as go
from plotly.offline import plot
from plotly.subplots import make_subplots
from sklearn.manifold import MDS

from shapash.style.style_utils import colors_loading, define_style, select_palette


[docs]class Consistency: """Consistency class""" def __init__(self): self._palette_name = list(colors_loading().keys())[0] self._style_dict = define_style(select_palette(colors_loading(), self._palette_name)) def tuning_colorscale(self, values): """Adapts the color scale to the distribution of points Parameters ---------- values: 1 column pd.DataFrame values ​​whose quantiles must be calculated """ desc_df = values.describe(percentiles=np.arange(0.1, 1, 0.1).tolist()) min_pred, max_init = list(desc_df.loc[["min", "max"]].values) desc_pct_df = (desc_df.loc[~desc_df.index.isin(["count", "mean", "std"])] - min_pred) / (max_init - min_pred) color_scale = list(map(list, (zip(desc_pct_df.values.flatten(), self._style_dict["init_contrib_colorscale"])))) return color_scale def compile(self, contributions, x=None, preprocessing=None): """Check whether the contributions respect the correct format: contributions = {"method_name_1": contrib_1, "method_name_2": contrib_2, ...} where each contrib_i is a pandas DataFrame Parameters ---------- contributions : dict Contributions provided by the user if no compute is required. Format must be {"method_name_1": contrib_1, "method_name_2": contrib_2, ...} where each contrib_i is a pandas DataFrame. By default None x : DataFrame, optional Dataset on which to compute consistency metrics, by default None preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None) --> Differents types of preprocessing are available: - A single category_encoders (OrdinalEncoder/OnehotEncoder/BaseNEncoder/BinaryEncoder/TargetEncoder) - A single ColumnTransformer with scikit-learn encoding or category_encoders transformers - A list with multiple category_encoders with optional (dict, list of dict) - A list with a single ColumnTransformer with optional (dict, list of dict) - A dict - A list of dict """ self.x = x self.preprocessing = preprocessing if not isinstance(contributions, dict): raise ValueError("Contributions must be a dictionary") self.methods = list(contributions.keys()) self.weights = list(contributions.values()) self.check_consistency_contributions(self.weights) self.index = self.weights[0].index def check_consistency_contributions(self, weights): """ Assert contributions calculated from different methods are dataframes of same shape with same column names and index names Parameters ---------- weights : list List of contributions from different methods """ if weights[0].ndim == 1: raise ValueError("Multiple datapoints are required to compute the metric") if not all(isinstance(x, pd.DataFrame) for x in weights): raise ValueError("Contributions must be pandas DataFrames") if not all(x.shape == weights[0].shape for x in weights): raise ValueError("Contributions must be of same shape") if not all(x.columns.tolist() == weights[0].columns.tolist() for x in weights): raise ValueError("Columns names are different between contributions") if not all(x.index.tolist() == weights[0].index.tolist() for x in weights): raise ValueError("Index names are different between contributions")
[docs] def consistency_plot(self, selection=None, max_features=20): """ The Consistency_plot has the main objective of comparing explainability methods. Because explainability methods are different from each other, they may not give the same explanation to the same instance. Then, which method should be selected? Answering this question is tough. This method compares methods between them and evaluates how close the explanations are from each other. The idea behind this is pretty simple: if underlying assumptions lead to similar results, we would be more confident in using those methods. If not, careful conideration should be taken in the interpretation of the explanations Parameters ---------- selection: list Contains list of index, subset of the input DataFrame that we use for the compute of consitency statistics, by default None max_features: int, optional Maximum number of displayed features, by default 20 """ # Selection if selection is None: weights = [weight.values for weight in self.weights] elif isinstance(selection, list): if len(selection) == 1: raise ValueError("Selection must include multiple points") else: weights = [weight.values[selection] for weight in self.weights] else: raise ValueError("Parameter selection must be a list") all_comparisons, mean_distances = self.calculate_all_distances(self.methods, weights) method_1, method_2, l2, index, backend_name_1, backend_name_2 = self.find_examples( mean_distances, all_comparisons, weights ) self.plot_comparison(mean_distances) self.plot_examples(method_1, method_2, l2, index, backend_name_1, backend_name_2, max_features)
def calculate_all_distances(self, methods, weights): """ For each instance, measure a distance between contributions from different methods. In addition, calculate the mean distance between each pair of method Parameters ---------- methods : list List of methods used in the calculation of contributions weights : list List of contributions from different methods Returns ------- all_comparisons : array Array containing, for each instance and each pair of methods, the distance between the contribtuions mean_distances : DataFrame DataFrame storing all pairwise distances between methods """ mean_distances = pd.DataFrame(np.zeros((len(methods), len(methods))), columns=methods, index=methods) # Initialize a (n choose 2)x4 array (n=num of instances) # that will contain : indices of methods that are compared, index of instance, L2 value of instance all_comparisons = np.array([np.repeat(None, 4)]) for index_i, index_j in itertools.combinations(range(len(methods)), 2): l2_dist = self.calculate_pairwise_distances(weights, index_i, index_j) # Populate the (n choose 2)x4 array pairwise_comparison = np.column_stack( ( np.repeat(index_i, len(l2_dist)), np.repeat(index_j, len(l2_dist)), np.arange(len(l2_dist)), l2_dist, ) ) all_comparisons = np.concatenate((all_comparisons, pairwise_comparison), axis=0) self.calculate_mean_distances(methods, mean_distances, index_i, index_j, l2_dist) all_comparisons = all_comparisons[1:, :] return all_comparisons, mean_distances def calculate_pairwise_distances(self, weights, index_i, index_j): """ For a specific pair of methods, calculate the distance between the contributions for all instances. Parameters ---------- weights : list List of contributions from 2 selected methods index_i : int Index of method 1 index_j : int Index of method 2 Returns ------- l2_dist : array Distance between the two selected methods for all instances """ # Normalize weights using L2 norm norm_weights_i = weights[index_i] / np.linalg.norm(weights[index_i], ord=2, axis=1)[:, np.newaxis] norm_weights_j = weights[index_j] / np.linalg.norm(weights[index_j], ord=2, axis=1)[:, np.newaxis] # And then take the L2 norm of the difference as a metric l2_dist = np.linalg.norm(norm_weights_i - norm_weights_j, ord=2, axis=1) return l2_dist def calculate_mean_distances(self, methods, mean_distances, index_i, index_j, l2_dist): """ Given the contributions of all instances for two selected instances, calculate the distance between them Parameters ---------- methods : list List of methods used in the calculation of contributions mean_distances : DataFrame DataFrame storing all pairwise distances between methods index_i : int Index of method 1 index_j : int Index of method 2 l2_dist : array Distance between the two selected methods for all instances """ # Calculate mean distance between the two methods and update the matrix mean_distances.loc[methods[index_i], methods[index_j]] = np.mean(l2_dist) mean_distances.loc[methods[index_j], methods[index_i]] = np.mean(l2_dist) def find_examples(self, mean_distances, all_comparisons, weights): """ To illustrate the meaning of distances between methods, extract 5 real examples from the dataset Parameters ---------- mean_distances : DataFrame DataFrame storing all pairwise distances between methods all_comparisons : array Array containing, for each instance and each pair of methods, the distance between the contribtuions weights : list List of contributions from 2 selected methods Returns ------- method_1 : list Contributions of 5 instances selected to display in the second plot for method 1 method_2 : list Contributions of 5 instances selected to display in the second plot for method 2 l2 : list Distance between method_1 and method_2 for the 5 instances index : list Index of the selected example backend_name_1 : list Name of the explainability method displayed on the left backend_name_2 : list Name of the explainability method displayed on the right """ method_1 = [] backend_name_1 = [] method_2 = [] backend_name_2 = [] index = [] l2 = [] # Evenly split the scale of L2 distances (from min to max excluding 0) for i in np.linspace( start=mean_distances[mean_distances > 0].min().min(), stop=mean_distances.max().max(), num=5 ): # For each split, find the closest existing L2 distance closest_l2 = all_comparisons[:, -1][np.abs(all_comparisons[:, -1] - i).argmin()] # Return the row that contains this L2 distance row = all_comparisons[all_comparisons[:, -1] == closest_l2] # Extract corresponding SHAP Values contrib_1 = weights[int(row[0, 0])][int(row[0, 2])] contrib_2 = weights[int(row[0, 1])][int(row[0, 2])] # Extract method names method_name_1 = self.methods[int(row[0, 0])] method_name_2 = self.methods[int(row[0, 1])] # Extract index of the selected example index_example = self.index[int(row[0, 2])] # Prevent from displaying duplicate examples if closest_l2 in l2: continue method_1.append(contrib_1 / np.linalg.norm(contrib_1, ord=2)) method_2.append(contrib_2 / np.linalg.norm(contrib_2, ord=2)) l2.append(closest_l2) index.append(index_example) backend_name_1.append(method_name_1) backend_name_2.append(method_name_2) return method_1, method_2, l2, index, backend_name_1, backend_name_2 def calculate_coords(self, mean_distances): """ Calculate 2D coords to position the different methods in the main graph Parameters ---------- mean_distances : DataFrame DataFrame storing all pairwise distances between methods Returns ------- Coordinates of each method """ return MDS(n_components=2, dissimilarity="precomputed", random_state=0).fit_transform(mean_distances) def plot_comparison(self, mean_distances): """ Plot the main graph displaying distances between methods Parameters ---------- mean_distances : DataFrame DataFrame storing all pairwise distances between methods """ font = {"color": "#{:02x}{:02x}{:02x}".format(50, 50, 50)} fig, ax = plt.subplots(ncols=1, figsize=(10, 6)) ax.text( x=0.5, y=1.04, s="Consistency of explanations:", fontsize=24, ha="center", transform=fig.transFigure, **font ) ax.text( x=0.5, y=0.98, s="How similar are explanations from different methods?", fontsize=18, ha="center", transform=fig.transFigure, **font, ) ax.set_title("Average distances between the explanations", fontsize=14, pad=-60) coords = self.calculate_coords(mean_distances) ax.scatter(coords[:, 0], coords[:, 1], marker="o") for i in range(len(mean_distances.columns)): ax.annotate( mean_distances.columns[i], xy=coords[i, :], xytext=(-5, 5), textcoords="offset points", ha="right", va="bottom", ) self.draw_arrow( ax, coords[i, :], coords[(i + 1) % mean_distances.shape[0], :], mean_distances.iloc[i, (i + 1) % mean_distances.shape[0]], ) # set gray background ax.set_facecolor("#F5F5F2") # draw solid white grid lines ax.grid(color="w", linestyle="solid") lim = (coords.min().min(), coords.max().max()) margin = 0.1 * (lim[1] - lim[0]) lim = (lim[0] - margin, lim[1] + margin) ax.set(xlim=lim, ylim=lim) ax.set_aspect("equal", anchor="C") ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_xticks(ax.get_yticks()) return fig def draw_arrow(self, ax, a, b, dst): """ Add an arrow in the main graph between the methods Parameters ---------- ax : ax Input ax used for the plot a : array Coordinates of method 1 b : array Coordinates of method 2 dst : float Distance between the methods """ ax.annotate( "", xy=a - 0.05 * (a - b), xycoords="data", xytext=b + 0.05 * (a - b), textcoords="data", arrowprops=dict(arrowstyle="<->"), ) ax.annotate( "%.2f" % dst, xy=(0.5 * (a[0] + b[0]), 0.5 * (a[1] + b[1])), xycoords="data", textcoords="data", ha="center", ) def plot_examples(self, method_1, method_2, l2, index, backend_name_1, backend_name_2, max_features): """ Plot the second graph that explains distances via the use of real exmaples extracted from the dataset Parameters ---------- method_1 : list Contributions of 5 instances selected to display in the second plot for method 1 method_2 : list Contributions of 5 instances selected to display in the second plot for method 2 l2 : list Distance between method_1 and method_2 for the 5 instances Returns ------- figure """ y = np.arange(method_1[0].shape[0]) fig, axes = plt.subplots(ncols=len(l2), figsize=(3 * len(l2), 4)) fig.subplots_adjust(wspace=0.3, top=0.8) if len(l2) == 1: axes = np.array([axes]) fig.suptitle("Examples of explanations' comparisons for various distances (L2 norm)") for n, (i, j, k, l, m, o) in enumerate(zip(method_1, method_2, l2, index, backend_name_1, backend_name_2)): # Only keep top features according to both methods idx = np.flip(np.abs(np.concatenate([i, j])).argsort()) % len(i) _, first_occurrence_idx = np.unique(idx, return_index=True) idx, y = idx[np.sort(first_occurrence_idx)][:max_features], y[:max_features] i, j = i[idx], j[idx] # Sort by method_1 (no abs) idx = np.flip(i.argsort()) i, j = i[idx], j[idx] axes[n].barh(y, i, label="method 1", left=0, color="#{:02x}{:02x}{:02x}".format(255, 166, 17)) axes[n].barh( y, j, label="method 2", left=np.abs(np.max(i)) + np.abs(np.min(j)) + np.max(i) / 3, color="#{:02x}{:02x}{:02x}".format(117, 152, 189), ) # /3 to add space # set gray background axes[n].set_facecolor("#F5F5F2") # draw solid white grid lines axes[n].grid(color="w", linestyle="solid") axes[n].set( title="{}: {}".format(self.index.name if self.index.name is not None else "Id", l) + "\n$d_{L2}$ = " + str(round(k, 2)) ) axes[n].set_xlabel("Contributions") axes[n].set_ylabel(f"Top {max_features} features") axes[n].set_xticks([0, np.abs(np.max(i)) + np.abs(np.min(j)) + np.max(i) / 3]) axes[n].set_xticklabels([m, o]) axes[n].set_yticks([]) return fig def pairwise_consistency_plot( self, methods, selection=None, max_features=10, max_points=100, file_name=None, auto_open=False ): """The Pairwise_Consistency_plot compares the difference of 2 explainability methods across each feature and each data point, and plots the distribution of those differences. This plot goes one step deeper than the consistency_plot which compares methods on a global level by expressing differences in terms of mean across the entire dataset. Not only we get an understanding of how differences are distributed across the dataset, but we can also identify whether there are patterns based on feature values, and understand when a method overestimates contributions compared to the other Parameters ---------- methods : list List of explainbility methods to compare selection: list Contains list of index, subset of the input DataFrame that we use for the compute of consitency statistics, by default None max_features: int, optional Maximum number of displayed features, by default 10 max_points : int, optional Maximum number of displayed datapoints per feature, by default 100 file_name: string, optional Specify the save path of html files. If it is not provided, no file will be saved. auto_open: bool open automatically the plot, by default False Returns ------- figure """ if self.x is None: raise ValueError("x must be defined in the compile to display the plot") if not isinstance(self.x, pd.DataFrame): raise ValueError("x must be a pandas DataFrame") if len(methods) != 2: raise ValueError("Choose 2 methods among methods of the contributions") # Select contributions of input methods pair_indices = [self.methods.index(x) for x in methods] pair_weights = [self.weights[i] for i in pair_indices] # Selection if selection is None: ind_max_points = self.x.sample(min(max_points, len(self.x))).index weights = [weight.iloc[ind_max_points] for weight in pair_weights] x = self.x.iloc[ind_max_points] elif isinstance(selection, list): if len(selection) == 1: raise ValueError("Selection must include multiple points") else: weights = [weight.iloc[selection] for weight in pair_weights] x = self.x.iloc[selection] else: raise ValueError("Parameter selection must be a list") # Remove constant columns const_cols = x.loc[:, x.apply(pd.Series.nunique) == 1] x = x.drop(const_cols, axis=1) weights = [weight.drop(const_cols, axis=1) for weight in weights] # Only keep features based on largest mean of absolute values mean_contributions = np.mean(np.abs(pd.concat(weights)), axis=0) top_features = np.flip(mean_contributions.sort_values(ascending=False)[:max_features].keys()) fig = self.plot_pairwise_consistency(weights, x, top_features, methods, file_name, auto_open) return fig def plot_pairwise_consistency(self, weights, x, top_features, methods, file_name, auto_open): """Plot the main graph displaying distances between methods across each feature and data point Parameters ---------- weights : list List of 2 dataframes containing contributions for the selected points x : DataFrame Original input data filtered on selected points top_features : array Top features to display ordered by mean of absolute contributions across all the selected points methods : list List of explainbility methods to compare file_name: string Specify the save path of html files. If it is not provided, no file will be saved. auto_open: bool open automatically the plot Returns ------- figure """ # Look for existing OrdinalEncoder. If none, create one for string columns if isinstance(self.preprocessing, OrdinalEncoder): encoder = self.preprocessing else: categorical_features = [col for col in x.columns if x[col].dtype == "object"] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown="ignore", return_df=True).fit(x) x = encoder.transform(x) xaxis_title = ( "Difference of contributions between the 2 methods" + f"<span style='font-size: 12px;'><br />{methods[0]} - {methods[1]}</span>" ) yaxis_title = ( "Top features<span style='font-size: 12px;'><br />(Ordered by mean of absolute contributions)</span>" ) fig = make_subplots(specs=[[{"secondary_y": True}]]) # Plot the distribution for i, c in enumerate(top_features): switch = False if c in encoder.cols: switch = True mapping = encoder.mapping[encoder.cols.index(c)]["mapping"] inverse_mapping = {v: k for k, v in mapping.to_dict().items()} feature_value = x[c].map(inverse_mapping) hv_text = [ f"<b>Feature value</b>: {i}<br><b>{methods[0]}</b>: {j}<br><b>{methods[1]}</b>: {k}<br><b>Diff</b>: {l}" for i, j, k, l in zip( feature_value if switch else x[c].round(3), weights[0][c].round(2), weights[1][c].round(2), (weights[0][c] - weights[1][c]).round(2), ) ] fig.add_trace( go.Violin( x=(weights[0][c] - weights[1][c]).values, name=c, points=False, fillcolor="rgba(255, 0, 0, 0.1)", line={"color": "black", "width": 0.5}, showlegend=False, ), secondary_y=False, ) fig.add_trace( go.Scatter( x=(weights[0][c] - weights[1][c]).values, y=len(x) * [i] + np.random.normal(0, 0.1, len(x)), mode="markers", marker={"color": x[c].values, "colorscale": self.tuning_colorscale(x[c]), "opacity": 0.7}, name=c, text=len(x) * [c], hovertext=hv_text, hovertemplate="<b>%{text}</b><br><br>" + "%{hovertext}<br>" + "<extra></extra>", showlegend=False, ), secondary_y=True, ) # Dummy invisible plot to add the color scale colorbar_trace = go.Scatter( x=[None], y=[None], mode="markers", marker=dict( size=1, color=[x.min(), x.max()], colorscale=self.tuning_colorscale(pd.Series(np.linspace(x.min().min(), x.max().max(), 10))), colorbar=dict( thickness=20, lenmode="pixels", len=400, yanchor="top", y=1.1, ypad=20, title="Feature values", tickvals=[x.min().min(), x.max().max()], ticktext=["Low", "High"], ), showscale=True, ), hoverinfo="none", showlegend=False, ) fig.add_trace(colorbar_trace) self._update_pairwise_consistency_fig( fig=fig, top_features=top_features, xaxis_title=xaxis_title, yaxis_title=yaxis_title, file_name=file_name, auto_open=auto_open, ) return fig def _update_pairwise_consistency_fig(self, fig, top_features, xaxis_title, yaxis_title, file_name, auto_open): """Function used for the pairwise_consistency_plot to update the layout of the plotly figure. Parameters ---------- fig : figure Plotly figure top_features : array Top features to display ordered by mean of absolute contributions across all the selected points xaxis_title : str Title for the x-axis yaxis_title : str Title for the y-axis file_name: string Specify the save path of html files. If it is not provided, no file will be saved. auto_open: bool open automatically the plot """ title = "Pairwise comparison of Consistency:" title += "<span style='font-size: 16px;'>\ <br />How are differences in contributions distributed across features?</span>" dict_t = copy.deepcopy(self._style_dict["dict_title_stability"]) dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"]) dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"]) dict_xaxis["text"] = xaxis_title dict_yaxis["text"] = yaxis_title dict_t["text"] = title fig.layout.yaxis.update(showticklabels=True) fig.layout.yaxis2.update(showticklabels=False) fig.update_layout( template="none", title=dict_t, xaxis_title=dict_xaxis, yaxis_title=dict_yaxis, yaxis=dict(range=[-0.7, len(top_features) - 0.3]), yaxis2=dict(range=[-0.7, len(top_features) - 0.3]), height=max(500, 40 * len(top_features)), ) fig.update_yaxes(automargin=True, zeroline=False) fig.update_xaxes(automargin=True) if file_name is not None: plot(fig, filename=file_name, auto_open=auto_open)