Source code for shapash.explainer.smart_plotter

"""
Smart plotter module
"""

import copy
import math
import random
import warnings
from numbers import Number

import numpy as np
import pandas as pd
import plotly.express as px
import scipy.cluster.hierarchy as sch
from plotly import graph_objs as go
from plotly.offline import plot
from plotly.subplots import make_subplots
from sklearn.cluster import KMeans

from shapash.manipulation.select_lines import select_lines
from shapash.manipulation.summarize import compute_corr, project_feature_values_1d
from shapash.style.style_utils import colors_loading, define_style, select_palette
from shapash.utils.utils import (
    add_line_break,
    add_text,
    compute_digit_number,
    compute_sorted_variables_interactions_list_indices,
    compute_top_correlations_features,
    maximum_difference_sort_value,
    truncate_str,
)
from shapash.webapp.utils.utils import round_to_k


[docs]class SmartPlotter:
    """
    SmartPlotter is a Bridge pattern decoupling plotting functions from SmartExplainer.
    The smartplotter class includes all the methods used to display graphics
    Each SmartPlotter method is easy to use from a Smart explainer object,
    just use the following syntax
    Attributes :
    explainer: object
        SmartExplainer instance to point to.
    Example
    --------
    >>> xpl.plot.my_plot_method(param=value)
    """

    def __init__(self, explainer):
        self.explainer = explainer
        self._palette_name = list(colors_loading().keys())[0]
        self._style_dict = define_style(select_palette(colors_loading(), self._palette_name))
        self.round_digit = None
        self.last_stability_selection = False
        self.last_compacity_selection = False

    def define_style_attributes(self, colors_dict):
        """
        define_style_attributes allows shapash user to change the color of plot
        Parameters
        ----------
        colors_dict: dict
            Dict of the colors used in the different plots
        """
        self._style_dict = define_style(colors_dict)

    def tuning_colorscale(self, values, keep_90_pct=False):
        """
        Adjusts the color scale based on the distribution of points.

        This function modifies the color scale used for visualization according to
        the distribution of the provided values. Optionally, it can exclude the top and bottom
        5% of values to focus on the core distribution of data.

        Parameters
        ----------
        values : pd.DataFrame
            A one-column DataFrame containing the values for which quantiles need to be calculated.
        keep_90_pct : bool, optional
            If True, the function adjusts the color scale to cover the central 90% of the data,
            excluding the lowest 5% and the highest 5%. Defaults to False.

        Returns
        -------
        tuple
            A tuple containing the adjusted color scale, the minimum value, and the maximum value
            used for the color scale adjustment.
        """
        # Extract the first column of values
        data = values.iloc[:, 0]

        # Initialize variables for min and max values
        cmin, cmax = None, None

        # Check if there is only one unique value
        if data.nunique() == 1:
            unique_value = data.iloc[0]
            cmin, cmax = unique_value, unique_value
            # Create a color scale where all values map to the unique value
            color_scale = [
                (i / (len(self._style_dict["init_contrib_colorscale"]) - 1), color)
                for i, color in enumerate(self._style_dict["init_contrib_colorscale"])
            ]
            return color_scale, cmin, cmax

        if keep_90_pct:
            # Calculate quantiles to exclude the extreme 10% of values
            lower_quantile = data.quantile(0.05)
            upper_quantile = data.quantile(0.95)
            data_tmp = data[(data >= lower_quantile) & (data <= upper_quantile)]
            if (len(data_tmp) > 200) and (data_tmp.nunique() > 1):
                data = data_tmp
            cmin, cmax = data.min(), data.max()

        # Describe the data to get basic statistics
        desc_df = data.describe(percentiles=np.arange(0.1, 1, 0.1).tolist())

        # Extract the initial min and max values
        min_pred, max_init = desc_df.loc[["min", "max"]]

        # Adjust percentile values for color scale creation
        desc_pct_df = (desc_df.loc[~desc_df.index.isin(["count", "mean", "std"])] - min_pred) / (max_init - min_pred)
        color_scale = [
            (value, color)
            for value, color in zip(desc_pct_df.values.flatten(), self._style_dict["init_contrib_colorscale"])
        ]

        return color_scale, cmin, cmax

    def tuning_round_digit(self):
        """
        adapts the display of the number of digit to the distribution of points
        """
        quantile = [0.25, 0.75]
        desc_df = self.explainer.y_pred.describe(percentiles=quantile)
        perc1, perc2 = list(desc_df.loc[[str(int(p * 100)) + "%" for p in quantile]].values)
        p_diff = perc2 - perc1
        self.round_digit = compute_digit_number(p_diff)

    def _update_contributions_fig(
        self,
        fig,
        feature_name,
        pred,
        proba_values,
        col_modality,
        col_scale,
        cmin,
        cmax,
        addnote,
        subtitle,
        width,
        height,
        file_name,
        auto_open,
    ):
        """
        Function used by both violin and scatter methods for contributions plots in order to
        update the layout of the (already) created plotly figure.
        Parameters
        ----------
        fig : go.Figure
            Plotly figure to be modified.
        feature_name : String
            Name of the feature, used in title
        pred: 1 column pd.DataFrame (optional)
            predicted values used to color plot - One Vs All in multiclass case
        proba_values: 1 column pd.DataFrame (optional)
            predicted proba used to color points - One Vs All in multiclass case
        col_modality: Int, Float or String (optional)
            parameter used in classification case,
            specify the modality to color in scatter plot (One Vs All)
        col_scale: list (optional)
            specify the color of points in scatter data
        cmin : float, optional
            The minimum value for the color scale, providing the lower bound for color normalization.
        cmax : float, optional
            The maximum value for the color scale, providing the upper bound for color normalization.
        addnote : String (default: None)
            Specify a note to display
        subtitle : String (default: None)
            Subtitle to display
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        """
        title = f"<b>{truncate_str(feature_name)}</b> - Feature Contribution"
        # Add subtitle and / or addnote
        if subtitle or addnote:
            # title += f"<span style='font-size: 12px;'><br />{add_text([subtitle, addnote], sep=' - ')}</span>"
            if subtitle and addnote:
                title += "<br><sup>" + subtitle + " - " + addnote + "</sup>"
            elif subtitle:
                title += "<br><sup>" + subtitle + "</sup>"
            else:
                title += "<br><sup>" + addnote + "</sup>"
        dict_t = copy.deepcopy(self._style_dict["dict_title"])
        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_t["text"] = title
        dict_xaxis["text"] = truncate_str(feature_name, 110)
        dict_yaxis["text"] = "Contribution"

        if self.explainer._case == "regression":
            colorpoints = pred
            colorbar_title = "Predicted"
        elif self.explainer._case == "classification":
            colorpoints = proba_values
            colorbar_title = "Predicted Proba"

        if colorpoints is not None:
            if fig.data[-1].type == "scatter":
                fig.data[-1].marker.color = colorpoints.values.flatten()
                fig.data[-1].marker.coloraxis = "coloraxis"
            fig.layout.coloraxis.colorscale = col_scale
            fig.layout.coloraxis.colorbar = {"title": {"text": colorbar_title}}
            if (cmin is not None) and (cmax is not None):
                fig.layout.coloraxis.cmin = cmin
                fig.layout.coloraxis.cmax = cmax

        elif fig.data[0].type != "violin":
            if self.explainer._case == "classification" and pred is not None:
                fig.data[-1].marker.color = pred.iloc[:, 0].apply(
                    lambda x: (
                        self._style_dict["violin_area_classif"][1]
                        if x == col_modality
                        else self._style_dict["violin_area_classif"][0]
                    )
                )
            else:
                fig.data[-1].marker.color = self._style_dict["violin_default"]

        fig.update_traces(marker={"line": {"width": 0.8, "color": "white"}})
        for trace in fig.data:
            if trace.type != "bar":
                trace.marker["size"] = 10

        fig.update_layout(
            boxmode="group",
            template="none",
            title=dict_t,
            width=width,
            height=height,
            xaxis_title=dict_xaxis,
            yaxis_title=dict_yaxis,
            hovermode="closest",
        )

        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)
        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)

    def plot_scatter(
        self,
        feature_values,
        contributions,
        feature_name,
        pred=None,
        proba_values=None,
        col_modality=None,
        col_scale=None,
        cmin=None,
        cmax=None,
        metadata=None,
        addnote=None,
        subtitle=None,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        Scatter plot of one feature contribution across the prediction set.
        Parameters
        ----------
        feature_values : 1 column pd.Dataframe
            The values of one feature
        contributions : 1 column pd.Dataframe
            The contributions associate
        feature_name : String
            Name of the feature, used in title
        pred: 1 column pd.DataFrame (optional)
            predicted values used to color plot - One Vs All in multiclass case
        proba_values: 1 column pd.DataFrame (optional)
            predicted proba used to color points - One Vs All in multiclass case
        col_modality: Int, Float or String (optional)
            parameter used in classification case,
            specify the modality to color in scatter plot (One Vs All)
        col_scale: list (optional)
            specify the color of points in scatter data
        cmin : float, optional
            The minimum value for the color scale, providing the lower bound for color normalization.
        cmax : float, optional
            The maximum value for the color scale, providing the upper bound for color normalization.
        addnote : String (default: None)
            Specify a note to display
        subtitle : String (default: None)
            Subtitle to display
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        zoom: bool (default=False)
            graph is currently zoomed
        """
        fig = go.Figure()

        column_name = feature_values.columns[0]
        feature_values = feature_values.sort_values(by=column_name)
        contributions = contributions.loc[feature_values.index]
        if pred is not None:
            pred = pred.loc[feature_values.index]
        if proba_values is not None:
            proba_values = proba_values.loc[feature_values.index]

        # add break line to X label if necessary
        max_len_by_row = max([round(50 / self.explainer.features_desc[feature_values.columns.values[0]]), 8])
        feature_values.iloc[:, 0] = feature_values.iloc[:, 0].apply(
            add_line_break,
            args=(
                max_len_by_row,
                120,
            ),
        )

        if pred is not None:
            hv_text = [f"Id: {x}<br />Predict: {y}" for x, y in zip(feature_values.index, pred.values.flatten())]
        else:
            hv_text = [f"Id: {x}" for x in feature_values.index]

        if metadata:
            metadata = {k: [round_to_k(x, 3) if isinstance(x, Number) else x for x in v] for k, v in metadata.items()}
            text_groups_features = np.swap = np.array([col_values for col_values in metadata.values()])
            text_groups_features = np.swapaxes(text_groups_features, 0, 1)
            text_groups_features_keys = list(metadata.keys())
            hovertemplate = (
                "<b>%{hovertext}</b><br />"
                + "Contribution: %{y:.4f} <br />"
                + "<br />".join(
                    [
                        "{}: %{{text[{}]}}".format(text_groups_features_keys[i], i)
                        for i in range(len(text_groups_features_keys))
                    ]
                )
                + "<extra></extra>"
            )
        else:
            hovertemplate = (
                "<b>%{hovertext}</b><br />"
                + f"{feature_name}: "
                + "%{customdata[0]}<br />Contribution: %{y:.4f}<extra></extra>"
            )
            text_groups_features = None

        feature_values_array = feature_values.values.flatten()

        if len(feature_values_array) > 2:
            contributions_min = contributions.values.flatten().min()
            h = contributions.values.flatten().max() - contributions_min

            if feature_values.iloc[:, 0].dtype.kind in "biufc":
                feature_values_min, feature_values_max = min(feature_values_array), max(feature_values_array)
                val_inter = feature_values_max - feature_values_min
                from sklearn.neighbors import KernelDensity

                feature_np = np.array(feature_values_array)
                feature_np = feature_np[~np.isnan(feature_np)][:, None]
                kde = KernelDensity(bandwidth=val_inter / 100, kernel="epanechnikov").fit(feature_np)
                xs = np.linspace(feature_values_min, feature_values_max, 1000)
                log_dens = kde.score_samples(xs[:, None])
                y_upper = np.exp(log_dens) * h / (np.max(np.exp(log_dens)) * 3) + contributions_min
                y_lower = np.full_like(y_upper, contributions_min)
            else:
                feature_values_counts = feature_values.value_counts()
                xs = feature_values_counts.index.get_level_values(0).sort_values()
                y_upper = (
                    feature_values_counts.loc[xs] / feature_values_counts.sum()
                ).values.flatten() / 3 + contributions_min
                y_lower = np.full_like(y_upper, contributions_min)

            # Create the density plot
            density_plot = go.Scatter(
                x=np.concatenate([pd.Series(xs), pd.Series(xs)[::-1]]),
                y=pd.concat([pd.Series(y_upper), pd.Series(y_lower)[::-1]]),
                fill="toself",
                hoverinfo="none",
                showlegend=False,
                line={"color": self._style_dict["contrib_distribution"]},
            )
            # Add density plot
            fig.add_trace(density_plot)

        fig.add_scatter(
            x=feature_values_array,
            y=contributions.values.flatten(),
            mode="markers",
            hovertext=hv_text,
            hovertemplate=hovertemplate,
            text=text_groups_features,
            showlegend=False,
        )
        # To change ticktext when the x label size is upper than 10 and zoom is False
        if (isinstance(feature_values_array[0], str)) & (not zoom):
            feature_val = [x.replace("<br />", "") for x in feature_values_array]
            feature_val = [x.replace(x[3 : len(x) - 3], "...") if len(x) > 10 else x for x in feature_val]

            fig.update_xaxes(
                tickangle=45, ticktext=feature_val, tickvals=feature_values_array, tickmode="array", dtick=1
            )
        # Customdata contains the values and index of feature_values.
        # The values are used in the hovertext and the indexes are used for
        # the interactions between the graphics.
        customdata = np.stack((feature_values_array, feature_values.index.values), axis=-1)

        fig.update_traces(customdata=customdata, hovertemplate=hovertemplate)

        self._update_contributions_fig(
            fig=fig,
            feature_name=feature_name,
            pred=pred,
            proba_values=proba_values,
            col_modality=col_modality,
            col_scale=col_scale,
            cmin=cmin,
            cmax=cmax,
            addnote=addnote,
            subtitle=subtitle,
            width=width,
            height=height,
            file_name=file_name,
            auto_open=auto_open,
        )

        return fig

    def _update_xaxis_labels(self, fig, xs, zoom=False):
        """
        Updates the x-axis labels of a Plotly figure based on label length and zoom status.
        Shortens labels if they are longer than a specified threshold.

        Parameters:
        - fig: The Plotly figure object to update.
        - xs: A list of x-axis label strings.
        - zoom: Boolean indicating whether zoom is enabled.
        """

        # Define common x-axis parameters
        params = {"tickvals": list(range(len(xs))), "tickmode": "array", "dtick": 1, "range": [-0.6, len(xs) - 0.4]}

        nb_feature = len(xs)
        # Determine label shortening strategy based on label count and zoom status
        if isinstance(xs[0], str):
            if not zoom:
                feature_val = [x.replace("<br />", "") for x in xs]
                if nb_feature < 6:
                    k = 10
                else:
                    k = 6

                # Shorten labels that exceed the threshold
                feature_val = [
                    x.replace(x[k + k // 2 : -k + k // 2], "...") if len(x) > 2 * k else x for x in feature_val
                ]
            else:
                k = 10
                feature_val = []
                for feature_name in xs:
                    feature_name_splited = [
                        x.replace(x[k + k // 2 : -k + k // 2], "...") if len(x) > 2 * k else x
                        for x in feature_name.split("<br />")
                    ]
                    feature_val_name = "<br />".join(feature_name_splited)
                    feature_val.append(feature_val_name)

            params["ticktext"] = feature_val

            # Adjust tick angle for longer lists of labels
            if nb_feature > 5 * (zoom + 1):
                params["tickangle"] = 45
        else:
            params["ticktext"] = xs

        # Update the figure with the new x-axis parameters
        fig.update_xaxes(**params)

    def _calculate_percentage_intervals(self, data, bins=20):
        """
        Calculates the percentage of data points within each interval of a binned distribution.

        Parameters:
        - data: DataFrame containing the data to bin and calculate percentages for.
        - bins: Number of bins to use for the distribution.

        Returns:
        - A numpy array of the percentage of points in the interval corresponding to each original data point.
        """
        # Binning data into intervals and calculating the percentage of points in each interval
        intervals = pd.cut(data, bins, duplicates="drop")
        points_per_interval = intervals.value_counts()
        total_points = len(data)
        percentage_per_interval = (points_per_interval / total_points).sort_index().to_dict()

        # Mapping those percentages to the original data points
        percentage_series = intervals.map(percentage_per_interval).to_numpy()

        return percentage_series

    def _create_jittered_points(
        self, numerical_features, percentages, mean=0, std=0.6, clip_min=-1, clip_max=1, side="both"
    ):
        """
        Creates jittered points by applying a random normal perturbation scaled by calculated percentages.

        Parameters:
        - numerical_features: The numerical features to which jitter will be added.
        - percentages: The percentages to scale the jitter by.
        - mean: Mean of the normal distribution to generate jitter.
        - std: Standard deviation of the normal distribution to generate jitter.
        - clip_min: Minimum value to clip the jitter values to.
        - clip_max: Maximum value to clip the jitter values to.

        Returns:
        - A numpy array of jittered points.
        """
        # Creating jittered points
        jitter = np.random.normal(mean, std, len(percentages))
        if np.isnan(percentages).any():
            percentages.fill(1)

        if side in ["negative", "positive"]:
            jitter = np.abs(jitter)

        jitter = np.clip(jitter, clip_min, clip_max)

        if side == "negative":
            jitter *= -1

        jittered_points = numerical_features + np.clip(jitter * percentages, -0.5, 0.5)

        return jittered_points

    def prepare_hover_text(self, feature_values, pred, feature_name):
        """
        Prepares the hover text for a Plotly plot based on feature values and predictions.

        Parameters:
        - feature_values: A pandas DataFrame of feature values.
        - pred: A pandas Series of predictions, can be None.
        - feature_name: The name of the feature for which the hover text is being prepared.

        Returns:
        - A pandas DataFrame containing the hover text.
        - The hover template to be used in Plotly.
        """
        # Building the base text for hover
        hv_text = [
            f"Id: {id_val}{f'<br />Predict: {pred_val}' if pred is not None else ''}"
            for id_val, pred_val in zip(
                feature_values.index, pred.values.flatten() if pred is not None else [""] * len(feature_values)
            )
        ]

        # Creating a DataFrame for hover text
        hv_text_df = pd.DataFrame(hv_text, columns=["text"], index=feature_values.index)

        # Hover template with contribution and custom data
        hv_temp = f"{feature_name} :<br />%{{customdata[0]}}<br />Contribution: %{{y:.4f}}<extra></extra>"
        hovertemplate = f"<b>%{{hovertext}}</b><br />{hv_temp}"

        return hv_text_df, hovertemplate

    def _add_violin_trace(self, fig, name, x, y, side, line_color, hovertext, secondary_y=True):
        """Adds a Violin trace to the figure."""
        # Violin plot has a problem if for one violin all the points have the same contribution value
        y = y + np.random.normal(size=y.shape) * (max(y.max(), 0) - min(y.min(), 0)) / 10 ** 8
        violin_trace = go.Violin(
            name=name,
            x=x,
            y=y,
            side=side,
            line_color=line_color,
            points=False,
            showlegend=False,
            meanline_visible=True,
            hovertext=hovertext,
        )

        if side:
            violin_trace.update(side=side)

        fig.add_trace(violin_trace, secondary_y=secondary_y)

    def _add_scatter_trace(self, fig, x, y, name, marker, hovertext, hovertemplate, customdata, secondary_y=True):
        """Adds a Scatter trace to the figure."""
        fig.add_trace(
            go.Scatter(
                x=x,
                y=y,
                name=name,
                mode="markers",
                marker=marker,
                showlegend=False,
                hovertext=hovertext,
                hovertemplate=hovertemplate,
                customdata=customdata,
            ),
            secondary_y=secondary_y,
        )

    def _add_violin_and_scatter(
        self,
        fig,
        feature_cond,
        contributions,
        feature_values,
        hovertext_df,
        colorpoints,
        col_scale,
        cmin,
        cmax,
        hovertemplate,
        i,
        c,
        line_color,
        secondary_y=True,
        side="both",
    ):
        """Adds a Violin trace and a Scatter trace based on specified conditions."""
        y = contributions.loc[feature_cond].iloc[:, 0].values
        if len(y) > 0:
            x = [i] * len(y)
            hovertext = hovertext_df.loc[feature_cond].values.flatten()

            self._add_violin_trace(fig, c, x, y, side, line_color, hovertext, secondary_y)

            percentage_series = self._calculate_percentage_intervals(
                contributions.loc[feature_cond].iloc[:, 0], bins=20
            )
            x = self._create_jittered_points(x, percentage_series, side=side)
            if colorpoints is not None:
                colorpoints_selected = colorpoints.loc[feature_cond].values.flatten()
            customdata = np.stack(
                (feature_values.loc[feature_cond].values.flatten(), contributions.loc[feature_cond].index.values),
                axis=-1,
            )
            marker = None
            if colorpoints is not None:
                marker = {
                    "color": colorpoints_selected,
                    "colorscale": col_scale,
                    "opacity": 0.7,
                    "cmin": cmin,
                    "cmax": cmax,
                }

            self._add_scatter_trace(fig, x, y, c, marker, hovertext, hovertemplate, customdata, secondary_y)

    def plot_violin(
        self,
        feature_values,
        contributions,
        feature_name,
        pred=None,
        proba_values=None,
        col_modality=None,
        col_scale=None,
        cmin=None,
        cmax=None,
        addnote=None,
        subtitle=None,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        Violin plot of one feature contribution across the prediction set.
        Parameters
        ----------
        feature_values : 1 column pd.Dataframe
            The values of one feature
        contributions : 1 column pd.Dataframe
            The contributions associate
        feature_name : String
            Name of the feature, used in title
        pred: 1 column pd.DataFrame (optional)
            predicted values used to color plot - One Vs All in multiclass case
        proba_values: 1 column pd.DataFrame (optional)
            predicted proba used to color points - One Vs All in multiclass case
        col_modality: Int, Float or String (optional)
            parameter used in classification case,
            specify the modality to color in scatter plot (One Vs All)
        col_scale: list (optional)
            specify the color of points in scatter data
        cmin : float, optional
            The minimum value for the color scale, providing the lower bound for color normalization.
        cmax : float, optional
            The maximum value for the color scale, providing the upper bound for color normalization.
        addnote : String (default: None)
            Specify a note to display
        subtitle : String (default: None)
            Subtitle to display
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        zoom: bool (default=False)
            graph is currently zoomed
        """
        from plotly.subplots import make_subplots

        fig = make_subplots(specs=[[{"secondary_y": True}]])

        column_name = feature_values.columns[0]
        feature_values = feature_values.sort_values(by=column_name)
        max_len_by_row = max([round(50 / self.explainer.features_desc[feature_values.columns.values[0]]), 8])
        feature_values.iloc[:, 0] = feature_values.iloc[:, 0].apply(
            add_line_break,
            args=(
                max_len_by_row,
                120,
            ),
        )
        contributions = contributions.loc[feature_values.index]
        if pred is not None:
            pred = pred.loc[feature_values.index]
        if proba_values is not None:
            proba_values = proba_values.loc[feature_values.index]

        hv_text_df, hovertemplate = self.prepare_hover_text(feature_values, pred, feature_name)

        feature_values_counts = feature_values.value_counts()
        xs = feature_values_counts.index.get_level_values(0).sort_values()

        y_upper = (feature_values_counts.loc[xs] / feature_values_counts.sum()).values.flatten()
        y_upper_max = y_upper.max()

        if self.explainer._case == "classification":
            colorpoints = proba_values
        elif self.explainer._case == "regression":
            colorpoints = pred
        else:
            colorpoints = None

        for i, c in enumerate(xs):
            # Add Density Plot
            fig.add_trace(
                go.Bar(
                    x=[i],
                    y=[y_upper[i]],
                    hoverinfo="none",
                    showlegend=False,
                    marker=dict(
                        pattern_shape="+",
                        pattern_size=6,
                        pattern_fillmode="replace",
                        pattern_bgcolor=self._style_dict["contrib_distribution"],
                        color="white",
                    ),
                )
            )

            if pred is not None and self.explainer._case == "classification":
                # Negative case
                feature_cond_neg = (pred.iloc[:, 0] != col_modality) & (feature_values.iloc[:, 0] == c)
                self._add_violin_and_scatter(
                    fig,
                    feature_cond_neg,
                    contributions,
                    feature_values,
                    hv_text_df,
                    colorpoints,
                    col_scale,
                    cmin,
                    cmax,
                    hovertemplate,
                    i,
                    c,
                    line_color=self._style_dict["violin_area_classif"][0],
                    secondary_y=True,
                    side="negative",
                )

                # Positive case
                feature_cond_pos = (pred.iloc[:, 0] == col_modality) & (feature_values.iloc[:, 0] == c)
                self._add_violin_and_scatter(
                    fig,
                    feature_cond_pos,
                    contributions,
                    feature_values,
                    hv_text_df,
                    colorpoints,
                    col_scale,
                    cmin,
                    cmax,
                    hovertemplate,
                    i,
                    c,
                    line_color=self._style_dict["violin_area_classif"][1],
                    secondary_y=True,
                    side="positive",
                )
            else:
                # General case
                feature_cond_other = feature_values.iloc[:, 0] == c
                self._add_violin_and_scatter(
                    fig,
                    feature_cond_other,
                    contributions,
                    feature_values,
                    hv_text_df,
                    colorpoints,
                    col_scale,
                    cmin,
                    cmax,
                    hovertemplate,
                    i,
                    c,
                    line_color=self._style_dict["violin_default"],
                    secondary_y=True,
                    side="both",
                )

        if colorpoints is not None:
            fig.add_trace(
                go.Scatter(
                    x=[None],
                    y=[None],
                    mode="markers",
                    showlegend=False,
                    hoverinfo="none",
                ),
                secondary_y=True,
            )

        fig.update_layout(
            violingap=0.05,
            violingroupgap=0,
            violinmode="overlay",
            xaxis_type="linear",
            barmode="overlay",
            yaxis=dict(
                side="right",
                range=[0, y_upper_max * 3],
                showticklabels=False,  # Hide tick labels
                showgrid=False,  # Hide grid lines (optional)
                visible=False,  # Make the entire axis invisible
            ),
            yaxis2=dict(
                overlaying="y",
                side="left",
            ),
        )

        # To change ticktext
        self._update_xaxis_labels(fig, xs, zoom)

        self._update_contributions_fig(
            fig=fig,
            feature_name=feature_name,
            pred=pred,
            proba_values=proba_values,
            col_modality=col_modality,
            col_scale=col_scale,
            cmin=cmin,
            cmax=cmax,
            addnote=addnote,
            subtitle=subtitle,
            width=width,
            height=height,
            file_name=file_name,
            auto_open=auto_open,
        )

        return fig

    def plot_features_import(
        self,
        feature_imp1,
        feature_imp2=None,
        title="Features Importance",
        addnote=None,
        subtitle=None,
        width=900,
        height=500,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        Plot features importance computed with the prediction set.
        Parameters
        ----------
        feature_imp1 : pd.Series
            Feature importance computed with every rows
        feature_imp2 : pd.Series, optional (default: None)
            The contributions associate
        title : str
            Title of the plot, default set to 'Features Importance'
        addnote : String (default: None)
            Specify a note to display
        subtitle : String (default: None)
            Subtitle to display
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 500)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        zoom: bool (default=False)
            graph is currently zoomed
        """
        dict_t = copy.deepcopy(self._style_dict["dict_title"])
        topmargin = 80
        # Add subtitle and / or addnote
        if subtitle or addnote:
            if subtitle and addnote:
                title += "<br><sup>" + subtitle + " - " + addnote + "</sup>"
            elif subtitle:
                title += "<br><sup>" + subtitle + "</sup>"
            else:
                title += "<br><sup>" + addnote + "</sup>"
            topmargin = topmargin + 15
        dict_t.update(text=title)
        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_xaxis.update(text="Mean absolute Contribution")
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_yaxis.update(text=None)
        dict_style_bar1 = self._style_dict["dict_featimp_colors"][1]
        dict_style_bar2 = self._style_dict["dict_featimp_colors"][2]
        dict_yaxis["text"] = None

        # Change bar color for groups of features
        marker_color = [
            (
                self._style_dict["featureimp_groups"][0]
                if (
                    self.explainer.features_groups is not None
                    and self.explainer.inv_features_dict.get(f.replace("<b>", "").replace("</b>", ""))
                    in self.explainer.features_groups.keys()
                )
                else dict_style_bar1["color"]
            )
            for f in feature_imp1.index
        ]

        layout = go.Layout(
            barmode="group",
            template="none",
            autosize=False,
            width=width,
            height=height,
            title=dict_t,
            xaxis_title=dict_xaxis,
            yaxis_title=dict_yaxis,
            hovermode="closest",
            margin={"l": 160, "r": 0, "t": topmargin, "b": 50},
        )
        # To change ticktext when the x label size is upper than 30 and zoom is False
        if (type(feature_imp1.index[0]) == str) & (not zoom):
            # change index to abc...abc if its length is upper than 30
            index_val = [y.replace(y[24 : len(y) - 3], "...") if len(y) > 30 else y for y in feature_imp1.index]
        else:
            index_val = feature_imp1.index
        bar1 = go.Bar(
            x=feature_imp1.round(4),
            y=feature_imp1.index,
            orientation="h",
            name="Global",
            marker=dict_style_bar1,
            marker_color=marker_color,
            hovertemplate="Feature: %{customdata}<br />Contribution: %{x:.4f}<extra></extra>",
            customdata=feature_imp1.index,
        )
        if feature_imp2 is not None:
            bar2 = go.Bar(
                x=feature_imp2.round(4),
                y=feature_imp2.index,
                orientation="h",
                name="Subset",
                marker=dict_style_bar2,
                hovertemplate="Feature: %{customdata}<br />Contribution: %{x:.4f}<extra></extra>",
                customdata=feature_imp2.index,
            )
            data = [bar2, bar1]
        else:
            data = bar1

        fig = go.Figure(data=data, layout=layout)
        # Update ticktext
        fig.update_yaxes(ticktext=index_val, tickvals=feature_imp1.index, tickmode="array", dtick=1)
        fig.update_yaxes(automargin=True)
        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)
        return fig

    def plot_bar_chart(
        self,
        index_value,
        var_dict,
        x_val,
        contrib,
        yaxis_max_label=12,
        subtitle=None,
        width=900,
        height=550,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        Plotly bar plot of local explainers
        Parameters
        ----------
        index_value:
            the index of row, used in title of local contribution plot
        var_dict: numpy array
            Unidimensional numpy array containing the features names for the observation of interest.
        x_val: numpy array
            Unidimensional numpy array containing the features values for the observation of interest.
        contrib: numpy array
            Unidimensional numpy array containing the contribution value for the observation of interest.
        yaxis_max_label: int (default: 12)
            Maximum number of variables to display labels on the y axis
        subtitle: string (default: None)
            subtitle to display
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 550)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        zoom: bool (default=False)
            graph is currently zoomed
        Returns
        -------
        plotly bar plot
            A bar plot with selected contributions and
            associated feature values for one observation.
        """
        if len(index_value) != 0:
            dict_t = copy.deepcopy(self._style_dict["dict_title"])
            topmargin = 80
            dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
            dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
            dict_local_plot_colors = copy.deepcopy(self._style_dict["dict_local_plot_colors"])
            title = f"Local Explanation - Id: <b>{index_value[0]}</b>"
            # Add subtitle
            if subtitle:
                title += "<br><sup>" + subtitle + "</sup>"
                topmargin += 15
            dict_t["text"] = title
            dict_xaxis["text"] = "Contribution"
            dict_yaxis["text"] = None

            layout = go.Layout(
                barmode="group",
                template="none",
                width=width,
                height=height,
                title=dict_t,
                xaxis_title=dict_xaxis,
                yaxis_title=dict_yaxis,
                yaxis_type="category",
                hovermode="closest",
                margin={"l": 150, "r": 20, "t": topmargin, "b": 70},
            )
            bars = []
            for num, expl in enumerate(list(zip(var_dict, x_val, contrib))):
                group_name = None
                if expl[1] == "":
                    ylabel = "<i>{}</i>".format(expl[0])
                    hoverlabel = "<b>{}</b>".format(expl[0])
                else:
                    # If bar is a group of features, hovertext includes the values of the features of the group
                    # And color changes
                    if (
                        self.explainer.features_groups is not None
                        and self.explainer.inv_features_dict.get(expl[0]) in self.explainer.features_groups.keys()
                        and len(index_value) > 0
                    ):
                        group_name = self.explainer.inv_features_dict.get(expl[0])
                        feat_groups_values = self.explainer.x_init[self.explainer.features_groups[group_name]].loc[
                            index_value[0]
                        ]
                        hoverlabel = "<br />".join(
                            [
                                "<b>{} :</b>{}".format(
                                    add_line_break(self.explainer.features_dict.get(f_name, f_name), 40, maxlen=120),
                                    add_line_break(f_value, 40, maxlen=160),
                                )
                                for f_name, f_value in feat_groups_values.to_dict().items()
                            ]
                        )
                    else:
                        hoverlabel = "<b>{} :</b><br />{}".format(
                            add_line_break(expl[0], 40, maxlen=120), add_line_break(expl[1], 40, maxlen=160)
                        )
                    trunc_value = truncate_str(expl[0], 45)
                    if not zoom:
                        # Truncate value if length is upper than 30
                        trunc_new_value = (
                            trunc_value.replace(trunc_value[24 : len(trunc_value) - 3], "...")
                            if len(trunc_value) > 30
                            else trunc_value
                        )
                    else:
                        trunc_new_value = trunc_value
                    if len(contrib) <= yaxis_max_label and (
                        self.explainer.features_groups is None
                        # We don't want to display label values for t-sne projected values of groups of features.
                        or (
                            self.explainer.features_groups is not None
                            and self.explainer.inv_features_dict.get(expl[0])
                            not in self.explainer.features_groups.keys()
                        )
                    ):
                        # ylabel is based on trunc_new_value
                        ylabel = "<b>{} :</b><br />{}".format(trunc_new_value, truncate_str(expl[1], 45))
                    else:
                        ylabel = f"<b>{trunc_new_value}</b>"
                contrib_value = expl[2]
                # colors
                if contrib_value >= 0:
                    color = 1 if expl[1] != "" else 0
                else:
                    color = -1 if expl[1] != "" else -2

                # If the bar is a group of features we modify the color
                if group_name is not None:
                    bar_color = (
                        self._style_dict["featureimp_groups"][0]
                        if color == 1
                        else self._style_dict["featureimp_groups"][1]
                    )
                else:
                    bar_color = dict_local_plot_colors[color]["color"]

                barobj = go.Bar(
                    x=[contrib_value],
                    y=[ylabel],
                    customdata=[hoverlabel],
                    orientation="h",
                    marker=dict_local_plot_colors[color],
                    marker_color=bar_color,
                    showlegend=False,
                    hovertemplate="%{customdata}<br />Contribution: %{x:.4f}<extra></extra>",
                )

                bars.append([color, contrib_value, num, barobj])

            bars.sort()
            fig = go.Figure(data=[x[-1] for x in bars], layout=layout)
            fig.update_yaxes(dtick=1)
            fig.update_yaxes(automargin=True)
            # fig.update_xaxes(automargin=True)

            if file_name:
                plot(fig, filename=file_name, auto_open=auto_open)
        else:
            fig = go.Figure()
            fig.update_layout(
                xaxis={"visible": False},
                yaxis={"visible": False},
                annotations=[
                    {
                        "text": "Select a valid single sample to display<br />Local Explanation plot.",
                        "xref": "paper",
                        "yref": "paper",
                        "showarrow": False,
                        "font": {"size": 14},
                    }
                ],
            )
        return fig

    def get_selection(self, line, var_dict, x_val, contrib):
        """
        An auxiliary function to select the row of interest.
        Parameters
        ----------
        line: list
            A one element list containing the index of the observation of interest.
        var_dict: pandas.DataFrame
            A dataframe that indicates for each observation (each row)
            the index of the sorted contribution
            (sorted by descending order, in absolute values).
        x_val: pandas.DataFrame
            A dataframe with sorted features for each observation.
        contrib: pandas.DataFrame
            A dataframe with sorted contributions for each observation.
        Returns
        -------
        numpy arrays
            Unidimensional numpy arrays containing the values for one observation.
        """
        contrib = contrib.loc[line[0], :].values
        x_val = x_val.loc[line[0], :].values
        var_dict = var_dict.loc[line[0], :].values

        return var_dict, x_val, contrib

    def apply_mask_one_line(self, line, var_dict, x_val, contrib, label=None):
        """
        An auxiliary function to select the mask to apply before plotting local
        explanation.
        Parameters
        ----------
        line: list
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        var_dict: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        x_val: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        contrib: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        label: integer (default None)
            specify the pd.DataFrame of the mask list (classification case) to apply
        Returns
        -------
        lists
            Masked input lists.
        """
        mask = np.array([True] * len(contrib))
        if hasattr(self.explainer, "mask"):
            if isinstance(self.explainer.mask, list):
                mask = self.explainer.mask[label].loc[line[0], :].values
            else:
                mask = self.explainer.mask.loc[line[0], :].values

        contrib = contrib[mask]
        x_val = x_val[mask]
        var_dict = var_dict[mask]

        return var_dict.tolist(), x_val.tolist(), contrib.tolist()

    def check_masked_contributions(self, line, var_dict, x_val, contrib, label=None):
        """
        Check for masked contributions and update features_values and contrib
        to take the sum of masked contributions into account.
        Parameters
        ----------
        line: list
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        var_dict: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        x_val: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        contrib: numpy array
            Unidimensional numpy array containing the values for the observation of interest.
        Returns
        -------
        numpy arrays
            Input arrays updated with masked contributions.
        """
        if hasattr(self.explainer, "masked_contributions"):
            if isinstance(self.explainer.masked_contributions, list):
                ext_contrib = self.explainer.masked_contributions[label].loc[line[0], :].values
            else:
                ext_contrib = self.explainer.masked_contributions.loc[line[0], :].values

            ext_var_dict = ["Hidden Negative Contributions", "Hidden Positive Contributions"]
            ext_x = ["", ""]
            ext_contrib = ext_contrib.tolist()

            exclusion = np.where(np.array(ext_contrib) == 0)[0].tolist()
            exclusion.sort(reverse=True)
            for ind in exclusion:
                del ext_var_dict[ind]
                del ext_x[ind]
                del ext_contrib[ind]

            var_dict.extend(ext_var_dict)
            x_val.extend(ext_x)
            contrib.extend(ext_contrib)

        return var_dict, x_val, contrib

    def local_pred(self, index, label=None):
        """
        compute a local pred to display in local_plot
        Parameters
        ----------
        index: string, int, float, ...
            specify the row we want to pred
        label: int (default: None)
        Returns
        -------
        float: Predict or predict_proba value
        """
        if self.explainer._case == "classification":
            if self.explainer.proba_values is not None:
                value = self.explainer.proba_values.iloc[:, [label]].loc[index].values[0]
            else:
                value = None
        elif self.explainer._case == "regression":
            if self.explainer.y_pred is not None:
                value = self.explainer.y_pred.loc[index]
            else:
                value = self.explainer.model.predict(self.explainer.x_encoded.loc[[index]])[0]

        if isinstance(value, pd.Series):
            value = value.values[0]

        return value

[docs]    def local_plot(
        self,
        index=None,
        row_num=None,
        query=None,
        label=None,
        show_masked=True,
        show_predict=True,
        display_groups=None,
        yaxis_max_label=12,
        width=900,
        height=550,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        The local_plot method is used to display the local contributions of
        an individual in the dataset.
        The plot returned is a summary of local explainability.
        you could use the method filter beforehand to modify the parameters of this summary.
        preprocessing is used here to make this graph more intelligible
        index, row_num or query parameter can be used to select the local explanations to display
        local_plot tutorial offers a lot of examples (please check tutorial part of this doc)
        Parameters
        ----------
        index: string, int, float, ... type of index in x_val input matrix (default None)
            1rst option, to select a row whose local contribution will be displayed.
            Use this parameter to select a row by index
        row_num: int (default None)
            2nd option, specify the row number to select the row whose local
            contribution will be displayed.
        query: string
            3rd option: Boolean condition that must filter only one line of the prediction
            set before plotting.
        label: integer or string (default None)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        show_masked: bool (default: False)
            show the sum of the contributions of the hidden variable
        show_predict: bool (default: True)
            show predict or predict proba value
        yaxis_max_label: int
            Maximum number of variables to display labels on the y axis
        display_groups : bool (default: None)
            Whether or not to display groups of features. This option is
            only useful if groups of features are declared when compiling
            SmartExplainer object.
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 550)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        zoom: bool (default=False)
            graph is currently zoomed
        Returns
        -------
        Plotly Figure Object
            Input arrays updated with masked contributions.
        Example
        --------
        >>> xpl.plot.local_plot(row_num=0)
        """
        display_groups = True if (display_groups is not False and self.explainer.features_groups is not None) else False
        if display_groups:
            data = self.explainer.data_groups
        else:
            data = self.explainer.data

        if index is not None:
            if index in self.explainer.x_init.index:
                line = [index]
            else:
                line = []
        elif row_num is not None:
            line = [self.explainer.x_init.index[row_num]]
        elif query is not None:
            line = select_lines(self.explainer.x_init, query)
        else:
            line = []

        subtitle = ""

        if len(line) != 1:
            if len(line) > 1:
                raise ValueError("Only one line/observation must match the condition")
            contrib = []
            x_val = []
            var_dict = []

        else:
            # apply filter if the method have not yet been asked in order to limit the number of feature to display
            if (
                not hasattr(self.explainer, "mask_params")  # If the filter method has not been called yet
                # Or if the already computed mask was not updated with current display_groups parameter
                or (
                    isinstance(data["contrib_sorted"], pd.DataFrame)
                    and len(data["contrib_sorted"].columns) != len(self.explainer.mask.columns)
                )
                or (
                    isinstance(data["contrib_sorted"], list)
                    and len(data["contrib_sorted"][0].columns) != len(self.explainer.mask[0].columns)
                )
            ):
                self.explainer.filter(max_contrib=20, display_groups=display_groups)

            if self.explainer._case == "classification":
                if label is None:
                    label = -1

                label_num, _, label_value = self.explainer.check_label_name(label)

                contrib = data["contrib_sorted"][label_num]
                x_val = data["x_sorted"][label_num]
                var_dict = data["var_dict"][label_num]

                if show_predict is True:
                    pred = self.local_pred(line[0], label_num)
                    if pred is None:
                        subtitle = f"Response: <b>{label_value}</b> - No proba available"
                    else:
                        subtitle = f"Response: <b>{label_value}</b> - Proba: <b>{pred:.4f}</b>"

            elif self.explainer._case == "regression":
                contrib = data["contrib_sorted"]
                x_val = data["x_sorted"]
                var_dict = data["var_dict"]
                label_num = None
                if show_predict is True:
                    pred_value = self.local_pred(line[0])
                    if self.explainer.y_pred is not None:
                        if self.round_digit is None:
                            self.tuning_round_digit()
                        digit = self.round_digit
                    else:
                        digit = compute_digit_number(pred_value)
                    subtitle = f"Predict: <b>{round(pred_value, digit)}</b>"

            var_dict, x_val, contrib = self.get_selection(line, var_dict, x_val, contrib)
            var_dict, x_val, contrib = self.apply_mask_one_line(line, var_dict, x_val, contrib, label=label_num)
            # use label of each column
            if display_groups:
                var_dict = [self.explainer.features_dict[self.explainer.x_init_groups.columns[x]] for x in var_dict]
            else:
                var_dict = [self.explainer.features_dict[self.explainer.columns_dict[x]] for x in var_dict]
            if show_masked:
                var_dict, x_val, contrib = self.check_masked_contributions(
                    line, var_dict, x_val, contrib, label=label_num
                )
            # Filtering all negative or positive contrib if specify in mask
            exclusion = []
            if hasattr(self.explainer, "mask_params"):
                if self.explainer.mask_params["positive"] is True:
                    exclusion = np.where(np.array(contrib) < 0)[0].tolist()
                elif self.explainer.mask_params["positive"] is False:
                    exclusion = np.where(np.array(contrib) > 0)[0].tolist()
            exclusion.sort(reverse=True)
            for expl in exclusion:
                del var_dict[expl]
                del x_val[expl]
                del contrib[expl]

        fig = self.plot_bar_chart(
            line, var_dict, x_val, contrib, yaxis_max_label, subtitle, width, height, file_name, auto_open, zoom
        )
        return fig

[docs]    def contribution_plot(
        self,
        col,
        selection=None,
        label=-1,
        violin_maxf=10,
        max_points=2000,
        proba=True,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        contribution_plot method diplays a Plotly scatter or violin plot of a selected feature.
        It represents the contribution of the selected feature to the predicted value.
        This plot allows the user to understand how the value of a feature affects a prediction
        Type of plot (Violin/scatter) is automatically selected. It depends on the feature
        to be analyzed, the type of use case (regression / classification) and the presence of
        predicted values attribute.
        A sample is taken if the number of points to be displayed is too large
        Using col parameter, shapash user can specify the column num, name or column label of
        the feature
        contribution_plot tutorial offers many examples (please check tutorial part of this doc)
        Parameters
        ----------
        col: String or Int
            Name, label name or column number of the column whose contributions we want to plot
        selection: list (optional)
            Contains list of index, subset of the input DataFrame that we want to plot
        label: integer or string (default -1)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        violin_maxf: int (optional, default: 10)
            maximum number modality to plot violin. If the feature specified with col argument
            has more modalities than violin_maxf, a scatter plot will be choose
        max_points: int (optional, default: 2000)
            maximum number to plot in contribution plot. if input dataset is bigger than max_points,
            a sample limits the number of points to plot.
            nb: you can also limit the number using 'selection' parameter.
        proba: bool (optional, default: True)
            use predict_proba to color plot (classification case)
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        zoom: bool (default=False)
            graph is currently zoomed
        Returns
        -------
        Plotly Figure Object
        Example
        --------
        >>> xpl.plot.contribution_plot(0)
        """

        if self.explainer._case == "classification":
            label_num, _, label_value = self.explainer.check_label_name(label)

        if not isinstance(col, (str, int)):
            raise ValueError("parameter col must be string or int.")
        if hasattr(self.explainer, "inv_features_dict"):
            col = self.explainer.inv_features_dict.get(col, col)
        col_is_group = self.explainer.features_groups and col in self.explainer.features_groups.keys()

        # Case where col is a group of features
        if col_is_group:
            contributions = self.explainer.contributions_groups
            col_label = self.explainer.features_dict[col]
            col_name = self.explainer.features_groups[col]  # Here col_name is actually a list of features
            col_value_count = self.explainer.features_desc[col]
        else:
            contributions = self.explainer.contributions
            col_id = self.explainer.check_features_name([col])[0]
            col_name = self.explainer.columns_dict[col_id]
            col_value_count = self.explainer.features_desc[col_name]

            if self.explainer.features_dict:
                col_label = self.explainer.features_dict[col_name]
            else:
                col_label = col_name

        list_ind, addnote = self.explainer.plot._subset_sampling(
            selection, max_points, None if col_is_group else col, col_value_count
        )

        col_value = None
        proba_values = None
        subtitle = None
        col_scale = None
        cmin = None
        cmax = None

        # Classification Case
        if self.explainer._case == "classification":
            subcontrib = contributions[label_num]
            if self.explainer.y_pred is not None:
                col_value = self.explainer._classes[label_num]
            subtitle = f"Response: <b>{label_value}</b>"
            # predict proba Color scale
            if proba and self.explainer.proba_values is not None:
                proba_values = self.explainer.proba_values.iloc[:, [label_num]]
                # Proba subset:
                proba_values = proba_values.loc[list_ind, :]
                col_scale, cmin, cmax = self.tuning_colorscale(proba_values, keep_90_pct=True)
            elif self.explainer.y_pred is not None:
                pred_values = self.explainer.y_pred.iloc[:, [label_num]]
                # Prediction subset:
                pred_values = pred_values.loc[list_ind, :]
                col_scale, cmin, cmax = self.tuning_colorscale(pred_values, keep_90_pct=True)

        # Regression Case - color scale
        elif self.explainer._case == "regression":
            subcontrib = contributions
            if self.explainer.y_pred is not None:
                col_scale, cmin, cmax = self.tuning_colorscale(self.explainer.y_pred.loc[list_ind], keep_90_pct=True)

        # Subset
        if self.explainer.postprocessing_modifications:
            feature_values = self.explainer.x_contrib_plot.loc[list_ind, col_name]
        else:
            feature_values = self.explainer.x_init.loc[list_ind, col_name]

        if col_is_group:
            feature_values = project_feature_values_1d(
                feature_values,
                col,
                self.explainer.x_init,
                self.explainer.x_encoded,
                self.explainer.preprocessing,
                features_dict=self.explainer.features_dict,
            )
            contrib = subcontrib.loc[list_ind, col].to_frame()
            if self.explainer.features_imp is None:
                self.explainer.compute_features_import()
            features_imp = (
                self.explainer.features_imp
                if isinstance(self.explainer.features_imp, pd.Series)
                else self.explainer.features_imp[0]
            )
            top_features_of_group = (
                features_imp.loc[self.explainer.features_groups[col]].sort_values(ascending=False)[:4].index
            )  # Displaying top 4 features
            metadata = {
                self.explainer.features_dict[f_name]: self.explainer.x_init[f_name] for f_name in top_features_of_group
            }
            text_group = "Features values were projected on the x axis using t-SNE"
            # if group don't show addnote, if not, it's too long
            # if addnote is not None:
            #    addnote = add_text([addnote, text_group], sep=' - ')
            # else:
            addnote = text_group
        else:
            contrib = subcontrib.loc[list_ind, col_name].to_frame()
            metadata = None
        feature_values = feature_values.to_frame()

        if self.explainer.y_pred is not None:
            y_pred = self.explainer.y_pred.loc[list_ind]
            # Add labels if exist
            if self.explainer._case == "classification" and self.explainer.label_dict is not None:
                y_pred = y_pred.map(lambda x: self.explainer.label_dict[x])
                col_value = self.explainer.label_dict[col_value]
            # round predict
            elif self.explainer._case == "regression":
                if self.round_digit is None:
                    self.tuning_round_digit()
                y_pred = y_pred.map(lambda x: round(x, self.round_digit))
        else:
            y_pred = None

        # selecting the best plot : Scatter, Violin?
        if col_value_count > violin_maxf:
            fig = self.plot_scatter(
                feature_values,
                contrib,
                col_label,
                y_pred,
                proba_values,
                col_value,
                col_scale,
                cmin,
                cmax,
                metadata,
                addnote,
                subtitle,
                width,
                height,
                file_name,
                auto_open,
                zoom,
            )
        else:
            fig = self.plot_violin(
                feature_values,
                contrib,
                col_label,
                y_pred,
                proba_values,
                col_value,
                col_scale,
                cmin,
                cmax,
                addnote,
                subtitle,
                width,
                height,
                file_name,
                auto_open,
                zoom,
            )

        return fig

[docs]    def features_importance(
        self,
        max_features=20,
        selection=None,
        label=-1,
        group_name=None,
        display_groups=True,
        force=False,
        width=900,
        height=500,
        file_name=None,
        auto_open=False,
        zoom=False,
    ):
        """
        features_importance display a plotly features importance plot.
        in Multiclass Case, this features_importance focus on a label value.
        User specifies the label value using label parameter.
        the selection parameter allows the user to compare a subset to the global features
        importance
        features_importance tutorial offers several examples
         (please check tutorial part of this doc)
        Parameters
        ----------
        max_features: int (optional, default 20)
            this argument limit the number of hbar in features importance plot
            if max_features is 20, plot selects the 20 most important features
        selection: list (optional, default None)
            This argument allows to represent the importance calculated with a subset.
            Subset features importance is compared to global in the plot
            Argument must contains list of index, subset of the input DataFrame that we want to plot
        label: integer or string (default -1)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        group_name : str (optional, default None)
            Allows to display the features importance of the variables that are grouped together
            inside a group of features.
            This parameter is only available if the SmartExplainer object has been compiled using
            the features_groups optional parameter and should correspond to a key of
            features_groups dictionary.
        display_groups : bool (default True)
            If groups of features are declared in SmartExplainer object, this parameter allows to
            specify whether or not to display them.
        force: bool (optional, default False)
            force == True, force the compute features importance if it's already done
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 500)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        zoom: bool (default=False)
            graph is currently zoomed
        Returns
        -------
        Plotly Figure Object
        Example
        --------
        >>> xpl.plot.features_importance()
        """
        self.explainer.compute_features_import(force=force)
        subtitle = None
        title = "Features Importance"
        display_groups = self.explainer.features_groups is not None and display_groups
        if display_groups:
            if group_name:  # Case where we have groups of features and we want to display only features inside a group
                if group_name not in self.explainer.features_groups.keys():
                    raise ValueError(
                        f"group_name parameter : {group_name} is not in features_groups keys. "
                        f"Possible values are : {list(self.explainer.features_groups.keys())}"
                    )
                title += f" - {truncate_str(self.explainer.features_dict.get(group_name), 20)}"
                if isinstance(self.explainer.features_imp, list):
                    features_importance = [
                        label_feat_imp.loc[label_feat_imp.index.isin(self.explainer.features_groups[group_name])]
                        for label_feat_imp in self.explainer.features_imp
                    ]
                else:
                    features_importance = self.explainer.features_imp.loc[
                        self.explainer.features_imp.index.isin(self.explainer.features_groups[group_name])
                    ]
                contributions = self.explainer.contributions
            else:
                features_importance = self.explainer.features_imp_groups
                contributions = self.explainer.contributions_groups
        else:
            features_importance = self.explainer.features_imp
            contributions = self.explainer.contributions

        # classification
        if self.explainer._case == "classification":
            label_num, _, label_value = self.explainer.check_label_name(label)
            global_feat_imp = features_importance[label_num].tail(max_features)
            if selection is not None:
                subset_feat_imp = self.explainer.backend.get_global_features_importance(
                    contributions=contributions[label_num], explain_data=self.explainer.explain_data, subset=selection
                )
            else:
                subset_feat_imp = None
            subtitle = f"Response: <b>{label_value}</b>"
        # regression
        elif self.explainer._case == "regression":
            global_feat_imp = features_importance.tail(max_features)
            if selection is not None:
                subset_feat_imp = self.explainer.backend.get_global_features_importance(
                    contributions=contributions, explain_data=self.explainer.explain_data, subset=selection
                )
            else:
                subset_feat_imp = None
        addnote = ""
        if subset_feat_imp is not None:
            subset_feat_imp = subset_feat_imp.reindex(global_feat_imp.index)
            subset_feat_imp.index = subset_feat_imp.index.map(self.explainer.features_dict)
            if subset_feat_imp.dropna().shape[0] == 0:
                raise ValueError("selection argument doesn't return any row")
            subset_len = len(selection)
            total_len = self.explainer.x_init.shape[0]
            addnote = add_text(
                [addnote, f"Subset length: {subset_len} ({int(np.round(100 * subset_len / total_len))}%)"], sep=" - "
            )
        if self.explainer.x_init.shape[1] >= max_features:
            addnote = add_text([addnote, f"Total number of features: {int(self.explainer.x_init.shape[1])}"], sep=" - ")

        global_feat_imp.index = global_feat_imp.index.map(self.explainer.features_dict)
        if display_groups:
            # Bold font for groups of features
            global_feat_imp.index = [
                (
                    "<b>" + str(f)
                    if self.explainer.inv_features_dict.get(f) in self.explainer.features_groups.keys()
                    else str(f)
                )
                for f in global_feat_imp.index
            ]
            if subset_feat_imp is not None:
                subset_feat_imp.index = [
                    (
                        "<b>" + str(f)
                        if self.explainer.inv_features_dict.get(f) in self.explainer.features_groups.keys()
                        else str(f)
                    )
                    for f in subset_feat_imp.index
                ]

        fig = self.plot_features_import(
            global_feat_imp, subset_feat_imp, title, addnote, subtitle, width, height, file_name, auto_open, zoom
        )
        return fig

    def plot_line_comparison(
        self,
        index,
        feature_values,
        contributions,
        predictions=None,
        dict_features=None,
        subtitle=None,
        width=900,
        height=550,
        file_name=None,
        auto_open=False,
    ):
        """
        Plotly plot for comparisons. Displays
        the contributions of several individuals. One line represents
        the different contributions of a unique individual.
        Parameters
        ----------
        index: list
            List of index corresponding to the individuals we want to compare.
        feature_values: list
            String list corresponding to the name of the features.
        contributions: numpy.ndarray
            Matrix of contributions.
            Each row corresponds to an individual.
        predictions: list
            List of pandas.Series containing values of individuals.
        dict_features: dict
            Dictionnary of feature names.
        subtitle: string (default : None)
            Subtitle to display.
        width: int (default: 900)
            Plotly figure - layout width
        height: int (default: 550)
            Plotly figure - layout height.
        file_name: string (optional)
            File name to use to save the plotly scatter chart. If None the scatter chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the scatter plot or not.
        Returns
        -------
        Plotly Figure Object
            Plot of the contributions of individuals, feature by feature.
        """

        dict_t = copy.deepcopy(self._style_dict["dict_title"])
        topmargin = 80
        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])

        if len(index) == 0:
            warnings.warn("No individuals matched", UserWarning)
            dict_t["text"] = "Compare plot - <b>No Matching Reference Entry</b>"
        elif len(index) < 2:
            warnings.warn("Comparison needs at least 2 individuals", UserWarning)
            dict_t["text"] = "Compare plot - index : " + " ; ".join(["<b>" + str(id) + "</b>" for id in index])
        else:
            dict_t["text"] = "Compare plot - index : " + " ; ".join(["<b>" + str(id) + "</b>" for id in index])

            dict_xaxis["text"] = "Contributions"

        dict_yaxis["text"] = None

        if subtitle is not None:
            topmargin += 15 * height / 275
            dict_t["text"] = (
                truncate_str(dict_t["text"], 120)
                + f"<span style='font-size: 12px;'><br />{truncate_str(subtitle, 200)}</span>"
            )

        layout = go.Layout(
            template="none",
            title=dict_t,
            xaxis_title=dict_xaxis,
            yaxis_title=dict_yaxis,
            yaxis_type="category",
            width=width,
            height=height,
            hovermode="closest",
            legend=dict(x=1, y=1),
            margin={"l": 150, "r": 20, "t": topmargin, "b": 70},
        )

        iteration_list = list(zip(contributions, feature_values))

        dic_color = copy.deepcopy(self._style_dict["dict_compare_colors"])
        lines = list()

        for i, id_i in enumerate(index):
            x_i = list()
            features = list()
            x_val = predictions[i]
            x_hover = list()

            for contrib, feat in iteration_list:
                x_i.append(contrib[i])
                features.append("<b>" + str(feat) + "</b>")
                pred_x_val = x_val[dict_features[feat]]
                x_hover.append(
                    f"Id: <b>{add_line_break(id_i, 40, 160)}</b>"
                    + f"<br /><b>{add_line_break(feat, 40, 160)}</b> <br />"
                    + f"Contribution: {contrib[i]:.4f} <br />Value: "
                    + str(add_line_break(pred_x_val, 40, 160))
                )

            lines.append(
                go.Scatter(
                    x=x_i,
                    y=features,
                    mode="lines+markers",
                    showlegend=True,
                    name=f"Id: <b>{index[i]}</b>",
                    hoverinfo="text",
                    hovertext=x_hover,
                    marker={"color": dic_color[i % len(dic_color)]},
                )
            )

        fig = go.Figure(data=lines, layout=layout)
        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)

        if file_name is not None:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

[docs]    def compare_plot(
        self,
        index=None,
        row_num=None,
        label=None,
        max_features=20,
        width=900,
        height=550,
        show_predict=True,
        file_name=None,
        auto_open=True,
    ):
        """
        Plotly comparison plot of several individuals' contributions. Plots contributions feature by feature.
        Allows to see the differences of contributions between two or more individuals,
        with each individual represented by a unique line.
        Parameters
        ----------
        index: list
            1st option to select individual rows.
            Int list of index referencing rows.
        row_num: list
            2nd option to select individual rows.
            int list corresponding to the row numbers of individuals (starting at 0).
        label: int or string (default: None)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        max_features: int (optional, default: 20)
            Number of contributions to show.
            If greater than the total of features, shows all.
        width: int (default: 900)
            Plotly figure - layout width.
        height: int (default: 550)
            Plotly figure - layout height.
        show_predict: boolean (default: True)
            Shows predict or predict_proba value.
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: boolean (optional)
            Indicates whether to open the bar plot or not.
        Returns
        -------
        Plotly Figure Object
            Comparison plot of the contributions of the different individuals.
        Example
        -------
        >>> xpl.plot.compare_plot(row_num=[0, 1, 2])
        """
        # Checking input is okay
        if sum(arg is not None for arg in [row_num, index]) != 1:
            raise ValueError("You have to specify just one of these arguments: index, row_num")
        # Getting indexes in a list
        line_reference = []
        if index is not None:
            for ident in index:
                if ident in self.explainer.x_init.index:
                    line_reference.append(ident)

        elif row_num is not None:
            line_reference = [
                self.explainer.x_init.index.values[row_nb_reference]
                for row_nb_reference in row_num
                if self.explainer.x_init.index.values[row_nb_reference] in self.explainer.x_init.index
            ]

        subtitle = ""
        if len(line_reference) < 1:
            raise ValueError("No matching entry for index")

        # Classification case
        if self.explainer._case == "classification":
            if label is None:
                label = -1

            label_num, _, label_value = self.explainer.check_label_name(label)
            contrib = self.explainer.contributions[label_num]

            if show_predict:
                preds = [self.local_pred(line, label_num) for line in line_reference]
                subtitle = (
                    f"Response: <b>{label_value}</b> - "
                    + "Probas: "
                    + " ; ".join(
                        [str(id) + ": <b>" + str(round(proba, 2)) + "</b>" for proba, id in zip(preds, line_reference)]
                    )
                )

        # Regression case
        elif self.explainer._case == "regression":
            contrib = self.explainer.contributions

            if show_predict:
                preds = [self.local_pred(line) for line in line_reference]
                subtitle = "Predictions: " + " ; ".join(
                    [str(id) + ": <b>" + str(round(pred, 2)) + "</b>" for id, pred in zip(line_reference, preds)]
                )

        new_contrib = list()
        for ident in line_reference:
            new_contrib.append(contrib.loc[ident])
        new_contrib = np.array(new_contrib).T

        # Well labels if available
        feature_values = [0] * len(contrib.columns)
        if hasattr(self.explainer, "columns_dict"):
            for i, name in enumerate(contrib.columns):
                feature_name = self.explainer.features_dict[name]
                feature_values[i] = feature_name

        preds = [self.explainer.x_init.loc[id] for id in line_reference]
        dict_features = self.explainer.inv_features_dict

        iteration_list = list(zip(new_contrib, feature_values))
        iteration_list.sort(key=lambda x: maximum_difference_sort_value(x), reverse=True)
        iteration_list = iteration_list[:max_features]
        iteration_list = iteration_list[::-1]
        new_contrib, feature_values = list(zip(*iteration_list))

        fig = self.plot_line_comparison(
            line_reference,
            feature_values,
            new_contrib,
            predictions=preds,
            dict_features=dict_features,
            width=width,
            height=height,
            subtitle=subtitle,
            file_name=file_name,
            auto_open=auto_open,
        )

        return fig

    def _plot_interactions_scatter(self, x_name, y_name, col_name, x_values, y_values, col_values, col_scale):
        """
        Function used to generate a scatter plot figure for the interactions plots.
        Parameters
        ----------
        x_name : str
            Name of the variable used as the x axis
        y_name : str
            Name of the variable used as the y axis
        col_name : str
            Name of the variable used as the color attribute
        x_values : pd.DataFrame
            Values of the points on the x axis as a 1 column DataFrame
        y_values : pd.DataFrame
            Values of the points on the y axis as a 1 column DataFrame
        col_values : pd.DataFrame
            Values of the color of the points as a 1 column DataFrame
        col_scale : list
            color scale
        Returns
        -------
        go.Figure
        """

        data_df = pd.DataFrame(
            {
                x_name: x_values.values.flatten(),
                y_name: y_values.values.flatten(),
                col_name: col_values.values.flatten(),
            }
        )

        if isinstance(col_values.values.flatten()[0], str):
            fig = px.scatter(
                data_df,
                x=x_name,
                y=y_name,
                color=col_name,
                color_discrete_sequence=self._style_dict["interactions_discrete_colors"],
            )
        else:
            fig = px.scatter(data_df, x=x_name, y=y_name, color=col_name, color_continuous_scale=col_scale)

        fig.update_traces(mode="markers")

        return fig

    def _plot_interactions_violin(self, x_name, y_name, col_name, x_values, y_values, col_values, col_scale):
        """
        Function used to generate a violin plot figure for the interactions plots.
        Parameters
        ----------
        x_name : str
            Name of the variable used as the x axis
        y_name : str
            Name of the variable used as the y axis
        col_name : str
            Name of the variable used as the color attribute
        x_values : pd.DataFrame
            Values of the points on the x axis as a 1 column DataFrame
        y_values : pd.DataFrame
            Values of the points on the y axis as a 1 column DataFrame
        col_values : pd.DataFrame
            Values of the color of the points as a 1 column DataFrame
        col_scale : list
            color scale
        Returns
        -------
        go.Figure
        """

        fig = go.Figure()

        uniq_l = list(pd.unique(x_values.values.flatten()))
        uniq_l.sort()

        for i in uniq_l:
            fig.add_trace(
                go.Violin(
                    x=x_values.loc[x_values.iloc[:, 0] == i].values.flatten(),
                    y=y_values.loc[x_values.iloc[:, 0] == i].values.flatten(),
                    line_color=self._style_dict["violin_default"],
                    showlegend=False,
                    meanline_visible=True,
                    scalemode="count",
                )
            )
        scatter_fig = self._plot_interactions_scatter(
            x_name=x_name,
            y_name=y_name,
            col_name=col_name,
            x_values=x_values,
            y_values=y_values,
            col_values=col_values,
            col_scale=col_scale,
        )
        for trace in scatter_fig.data:
            fig.add_trace(trace)

        fig.update_layout(
            autosize=False,
            hovermode="closest",
            violingap=0.05,
            violingroupgap=0,
            violinmode="overlay",
            xaxis_type="category",
        )

        fig.update_xaxes(range=[-0.6, len(uniq_l) - 0.4])

        return fig

    def _update_interactions_fig(self, fig, col_name1, col_name2, addnote, width, height, file_name, auto_open):
        """
        Function used for the interactions plot to update the layout of the plotly figure.
        Parameters
        ----------
        col_name1 : str
            Name of the first column whose contributions we want to plot
        col_name2 : str
            Name of the second column whose contributions we want to plot
        addnote : str
            Text to be added to the figure title
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        Returns
        -------
        go.Figure
        """

        if fig.data[-1]["showlegend"] is False:  # Case where col2 is not categorical
            fig.layout.coloraxis.colorscale = self._style_dict["interactions_col_scale"]
        else:
            fig.update_layout(legend=dict(title=dict(text=col_name2)))

        title = f"<b>{truncate_str(col_name1)} and {truncate_str(col_name2)}</b> shap interaction values"
        if addnote:
            title += f"<span style='font-size: 12px;'><br />{add_text([addnote], sep=' - ')}</span>"
        dict_t = copy.deepcopy(self._style_dict["dict_title"])
        dict_t["text"] = title

        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_xaxis["text"] = truncate_str(col_name1, 110)
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_yaxis["text"] = "Shap interaction value"

        fig.update_traces(marker={"size": 8, "opacity": 0.8, "line": {"width": 0.8, "color": "white"}})

        fig.update_layout(
            coloraxis=dict(colorbar={"title": {"text": col_name2}}),
            yaxis_title=dict_yaxis,
            title=dict_t,
            template="none",
            width=width,
            height=height,
            xaxis_title=dict_xaxis,
            hovermode="closest",
        )

        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)

        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

    def _select_indices_interactions_plot(self, selection, max_points):
        """
        Method used for sampling indices.
        Parameters
        ----------
        selection : list
            Contains list of index, subset of the input DataFrame that we want to plot
        max_points : int
            Maximum number to plot in contribution plot. if input dataset is bigger than max_points,
            a sample limits the number of points to plot.
            nb: you can also limit the number using 'selection' parameter.
        Returns
        -------
        list_ind : list
            List of indices to select
        addnote : str
            Text to inform the user the selection that has been done.
        """
        # Sampling
        addnote = None
        if selection is None:
            # interaction_selection attribute is used to store already computed indices of interaction_values
            if hasattr(self, "interaction_selection"):
                list_ind = self.interaction_selection
            elif self.explainer.x_init.shape[0] <= max_points:
                list_ind = self.explainer.x_init.index.tolist()
            else:
                list_ind = random.sample(self.explainer.x_init.index.tolist(), max_points)
                addnote = "Length of random Subset : "
        elif isinstance(selection, list):
            if len(selection) <= max_points:
                list_ind = selection
                addnote = "Length of user-defined Subset : "
            elif hasattr(self, "interaction_selection"):
                if set(selection).issubset(set(self.interaction_selection)):
                    list_ind = self.interaction_selection
            else:
                list_ind = random.sample(selection, max_points)
                addnote = "Length of random Subset : "
        else:
            ValueError("parameter selection must be a list")
        self.interaction_selection = list_ind

        return list_ind, addnote

    def interactions_plot(
        self,
        col1,
        col2,
        selection=None,
        violin_maxf=10,
        max_points=500,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
    ):
        """
        Diplays a Plotly scatter plot or violin plot of two selected features and their combined
        contributions for each of their values.
        This plot allows the user to understand how the different combinations of values of the
        two selected features influence the importance of the two features in the model output.
        A sample is taken if the number of points to be displayed is too large
        Parameters
        ----------
        col1: String or Int
            Name, label name or column number of the first column whose contributions we want to plot
        col2: String or Int
            Name, label name or column number of the second column whose contributions we want to plot
        selection: list (optional)
            Contains list of index, subset of the input DataFrame that we want to plot
        violin_maxf: int (optional, default: 10)
            maximum number modality to plot violin. If the feature specified with col argument
            has more modalities than violin_maxf, a scatter plot will be choose
        max_points: int (optional, default: 2000)
            maximum number of points to plot in contribution plot. if input dataset is bigger than
            max_points, a sample limits the number of points to plot.
            nb: you can also limit the number using 'selection' parameter.
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        Returns
        -------
        Plotly Figure Object
        Example
        --------
        >>> xpl.plot.interactions_plot(0, 1)
        """

        if not (isinstance(col1, (str, int)) or isinstance(col2, (str, int))):
            raise ValueError("parameters col1 and col2 must be string or int.")

        col_id1 = self.explainer.check_features_name([col1])[0]
        col_name1 = self.explainer.columns_dict[col_id1]

        col_id2 = self.explainer.check_features_name([col2])[0]
        col_name2 = self.explainer.columns_dict[col_id2]

        col_value_count1 = self.explainer.features_desc[col_name1]

        list_ind, addnote = self._select_indices_interactions_plot(selection=selection, max_points=max_points)

        if addnote is not None:
            addnote = add_text(
                [addnote, f"{len(list_ind)} ({int(np.round(100 * len(list_ind) / self.explainer.x_init.shape[0]))}%)"],
                sep="",
            )

        # Subset
        if self.explainer.postprocessing_modifications:
            feature_values1 = self.explainer.x_contrib_plot.loc[list_ind, col_name1].to_frame()
            feature_values2 = self.explainer.x_contrib_plot.loc[list_ind, col_name2].to_frame()
        else:
            feature_values1 = self.explainer.x_init.loc[list_ind, col_name1].to_frame()
            feature_values2 = self.explainer.x_init.loc[list_ind, col_name2].to_frame()

        interaction_values = self.explainer.get_interaction_values(selection=list_ind)[:, col_id1, col_id2]
        if col_id1 != col_id2:
            interaction_values = interaction_values * 2

        # add break line to X label if necessary
        max_len_by_row = max([round(50 / self.explainer.features_desc[feature_values1.columns.values[0]]), 8])
        feature_values1.iloc[:, 0] = feature_values1.iloc[:, 0].apply(
            add_line_break,
            args=(
                max_len_by_row,
                120,
            ),
        )

        # selecting the best plot : Scatter, Violin?
        if col_value_count1 > violin_maxf:
            fig = self._plot_interactions_scatter(
                x_name=col_name1,
                y_name="Shap interaction value",
                col_name=col_name2,
                x_values=feature_values1,
                y_values=pd.DataFrame(interaction_values, index=feature_values1.index),
                col_values=feature_values2,
                col_scale=self._style_dict["interactions_col_scale"],
            )
        else:
            fig = self._plot_interactions_violin(
                x_name=col_name1,
                y_name="Shap interaction value",
                col_name=col_name2,
                x_values=feature_values1,
                y_values=pd.DataFrame(interaction_values, index=feature_values1.index),
                col_values=feature_values2,
                col_scale=self._style_dict["interactions_col_scale"],
            )

        self._update_interactions_fig(
            fig=fig,
            col_name1=col_name1,
            col_name2=col_name2,
            addnote=addnote,
            width=width,
            height=height,
            file_name=file_name,
            auto_open=auto_open,
        )

        return fig

[docs]    def top_interactions_plot(
        self,
        nb_top_interactions=5,
        selection=None,
        violin_maxf=10,
        max_points=500,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
    ):
        """
        Displays a dynamic plot with the `nb_top_interactions` most important interactions existing
        between two variables.
        The most important interactions are determined computing the sum of all absolute shap interactions
        values between all existing pairs of variables.
        A button allows to select and display the corresponding features values and their shap contribution values.
        Parameters
        ----------
        nb_top_interactions : int
            Number of top interactions to display.
        selection : list (optional)
            Contains list of index, subset of the input DataFrame that we want to plot
        violin_maxf : int (optional, default: 10)
            maximum number modality to plot violin. If the feature specified with col argument
            has more modalities than violin_maxf, a scatter plot will be choose
        max_points : int (optional, default: 500)
            maximum number to plot in contribution plot. if input dataset is bigger than max_points,
            a sample limits the number of points to plot.
            nb: you can also limit the number using 'selection' parameter.
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        Returns
        -------
        go.Figure
        Example
        --------
        >>> xpl.plot.top_interactions_plot()
        """

        list_ind, addnote = self._select_indices_interactions_plot(selection=selection, max_points=max_points)

        interaction_values = self.explainer.get_interaction_values(selection=list_ind)

        sorted_top_features_indices = compute_sorted_variables_interactions_list_indices(interaction_values)

        indices_to_plot = sorted_top_features_indices[:nb_top_interactions]
        interactions_indices_traces_mapping = []
        fig = go.Figure()
        for i, ids in enumerate(indices_to_plot):
            id0, id1 = ids

            fig_one_interaction = self.interactions_plot(
                col1=self.explainer.columns_dict[id0],
                col2=self.explainer.columns_dict[id1],
                selection=selection,
                violin_maxf=violin_maxf,
                max_points=max_points,
                width=width,
                height=height,
                file_name=None,
                auto_open=False,
            )

            # The number of traces of each figure is stored
            interactions_indices_traces_mapping.append(len(fig_one_interaction.data))

            for trace in fig_one_interaction.data:
                trace.visible = True if i == 0 else False
                fig.add_trace(trace=trace)

        def generate_title_dict(col_name1, col_name2, addnote):
            title = f"<b>{truncate_str(col_name1)} and {truncate_str(col_name2)}</b> shap interaction values"
            if addnote:
                title += f"<span style='font-size: 12px;'><br />{add_text([addnote], sep=' - ')}</span>"
            dict_t = copy.deepcopy(self._style_dict["dict_title"])
            dict_t.update({"text": title, "y": 0.88, "x": 0.5, "xanchor": "center", "yanchor": "top"})
            return dict_t

        fig.layout.coloraxis.colorscale = self._style_dict["interactions_col_scale"]
        fig.update_layout(
            xaxis_title=self.explainer.columns_dict[sorted_top_features_indices[0][0]],
            yaxis_title="Shap interaction value",
            updatemenus=[
                dict(
                    active=0,
                    buttons=list(
                        [
                            dict(
                                label=f"{self.explainer.columns_dict[i]} - {self.explainer.columns_dict[j]}",
                                method="update",
                                args=[
                                    {
                                        "visible": [
                                            True if i == id_trace else False
                                            for i, x in enumerate(interactions_indices_traces_mapping)
                                            for _ in range(x)
                                        ]
                                    },
                                    {
                                        "xaxis": {
                                            "title": {
                                                **{"text": self.explainer.columns_dict[i]},
                                                **self._style_dict["dict_xaxis"],
                                            }
                                        },
                                        "legend": {"title": {"text": self.explainer.columns_dict[j]}},
                                        "coloraxis": {
                                            "colorbar": {"title": {"text": self.explainer.columns_dict[j]}},
                                            "colorscale": fig.layout.coloraxis.colorscale,
                                        },
                                        "title": generate_title_dict(
                                            self.explainer.columns_dict[i], self.explainer.columns_dict[j], addnote
                                        ),
                                    },
                                ],
                            )
                            for id_trace, (i, j) in enumerate(indices_to_plot)
                        ]
                    ),
                    direction="down",
                    pad={"r": 10, "t": 10},
                    showactive=True,
                    x=0.37,
                    xanchor="left",
                    y=1.25,
                    yanchor="top",
                )
            ],
            annotations=[
                dict(
                    text=f"Sorted top {len(indices_to_plot)} SHAP interaction Variables :",
                    x=0,
                    xref="paper",
                    y=1.2,
                    yref="paper",
                    align="left",
                    showarrow=False,
                )
            ],
        )

        self._update_interactions_fig(
            fig=fig,
            col_name1=self.explainer.columns_dict[sorted_top_features_indices[0][0]],
            col_name2=self.explainer.columns_dict[sorted_top_features_indices[0][1]],
            addnote=addnote,
            width=width,
            height=height,
            file_name=None,
            auto_open=False,
        )

        fig.update_layout(title={"y": 0.88, "x": 0.5, "xanchor": "center", "yanchor": "top"})

        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

    def correlations(
        self,
        df=None,
        optimized=False,
        max_features=20,
        features_to_hide=None,
        facet_col=None,
        how="phik",
        width=900,
        height=500,
        degree=2.5,
        decimals=2,
        file_name=None,
        auto_open=False,
    ):
        """
        Correlations matrix heatmap plot.
        The method can use phik or pearson correlations.
        The correlations computed can be changed using the parameter 'how'.
        Parameters
        ----------
        df : pd.DataFrame, optional
            DataFrame for which we want to compute correlations. Will use x_init by default.
        optimized : boolean, optional
            True if we want to potentially accelerate the computation of the correlation matrix by reducing the
            lenght of the data and the number of modalties per columns.
        max_features : int (default: 10)
            Max number of features to show on the matrix.
        features_to_hide : list (optional)
            List of features that will not appear on the graph
        facet_col : str (optional)
            Name of the column used to split the graph in two (or more) plots. One correlation
            subplot will be computed for each value of this column.
        how : str (default: 'phik')
            Correlation method used. 'phik' or 'pearson' are possible values. 'phik' is used by default.
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        degree  : int, optional, (default 2.5)
            degree applied on the correlation matrix in order to focus more or less the clustering
            on strong correlated variables
        decimals : int, optional, (default 2)
            number of decimals to plot for correlation values
        file_name: string (optional)
            File name to use to save the plotly bar chart. If None the bar chart will not be saved.
        auto_open: Boolean (optional)
            Indicate whether to open the bar plot or not.
        Returns
        -------
        go.Figure
        Example
        --------
        >>> xpl.plot.correlations()
        """

        def cluster_corr(corr, degree, inplace=False):
            """
            Rearranges the correlation matrix, corr, so that groups of highly
            correlated variables are next to eachother

            Parameters
            ----------
            corr : pandas.DataFrame or numpy.ndarray
                a NxN correlation matrix
            degree  : int
                degree applied on the correlation matrix in order to focus more or less the clustering
                on strong correlated variables
            inplace : bool, optional
                to replace the original correlation matrix by the new one, by default False

            Returns
            -------
            pandas.DataFrame or numpy.ndarray
                a NxN correlation matrix with the columns and rows rearranged
            """

            if corr.shape[0] < 2:
                return corr

            pairwise_distances = sch.distance.pdist(corr ** degree)
            linkage = sch.linkage(pairwise_distances, method="complete")
            cluster_distance_threshold = pairwise_distances.max() / 2
            idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold, criterion="distance")
            idx = np.argsort(idx_to_cluster_array)

            if not inplace:
                corr = corr.copy()

            if isinstance(corr, pd.DataFrame):
                return corr.iloc[idx, :].T.iloc[idx, :]

            return corr[idx, :][:, idx]

        if features_to_hide is None:
            features_to_hide = []

        if df is None:
            # Use x_init by default
            df = self.explainer.x_init.copy()

        if optimized:
            categorical_columns = df.select_dtypes(include=["object", "category"]).columns

            for col in categorical_columns:
                top_categories = df[col].value_counts().nlargest(200).index
                df[col] = df[col].where(df[col].isin(top_categories), other="Other")

            if len(df) > 10000:
                df = df.sample(n=10000, random_state=1)

        if facet_col:
            features_to_hide += [facet_col]

        compute_method = how

        hovertemplate = "<b>%{text}<br />Correlation: %{z}</b><extra></extra>"

        list_features = []
        if facet_col:
            facet_col_values = sorted(df[facet_col].unique(), reverse=True)
            fig = make_subplots(
                rows=1,
                cols=df[facet_col].nunique(),
                subplot_titles=[t + " correlation" for t in facet_col_values],
                horizontal_spacing=0.15,
            )
            # Used for the Shapash report to get train then test set
            for i, col_v in enumerate(facet_col_values):
                corr = compute_corr(df.loc[df[facet_col] == col_v].drop(features_to_hide, axis=1), compute_method)

                # Keep the same list of features for each subplot
                if len(list_features) == 0:
                    top_features = compute_top_correlations_features(corr=corr, max_features=max_features)
                    corr = cluster_corr(corr.loc[top_features, top_features], degree=degree)
                    list_features = list(corr.columns)
                    k = 6
                    list_features_shorten = [
                        x.replace(x[k + k // 2 : -k + k // 2], "...") if len(x) > 2 * k else x for x in list_features
                    ]

                fig.add_trace(
                    go.Heatmap(
                        z=corr.loc[list_features, list_features].round(decimals).values,
                        x=list_features_shorten,
                        y=list_features_shorten,
                        coloraxis="coloraxis",
                        text=[
                            [
                                f"Feature 1: {self.explainer.features_dict.get(y, y)} <br />"
                                f"Feature 2: {self.explainer.features_dict.get(x, x)}"
                                for x in list_features
                            ]
                            for y in list_features
                        ],
                        hovertemplate=hovertemplate,
                    ),
                    row=1,
                    col=i + 1,
                )

        else:
            corr = compute_corr(df.drop(features_to_hide, axis=1), compute_method)
            top_features = compute_top_correlations_features(corr=corr, max_features=max_features)
            corr = cluster_corr(corr.loc[top_features, top_features], degree=degree)
            list_features = [col for col in corr.columns if col in top_features]
            k = 6
            list_features_shorten = [
                x.replace(x[k + k // 2 : -k + k // 2], "...") if len(x) > 2 * k else x for x in list_features
            ]

            fig = go.Figure(
                go.Heatmap(
                    z=corr.loc[list_features, list_features].round(decimals).values,
                    x=list_features_shorten,
                    y=list_features_shorten,
                    coloraxis="coloraxis",
                    text=[
                        [
                            f"Feature 1: {self.explainer.features_dict.get(y, y)} <br />"
                            f"Feature 2: {self.explainer.features_dict.get(x, x)}"
                            for x in list_features
                        ]
                        for y in list_features
                    ],
                    hovertemplate=hovertemplate,
                )
            )

        title = f"Correlation ({compute_method})"
        if len(list_features) < len(df.drop(features_to_hide, axis=1).columns):
            subtitle = f"Top {len(list_features)} correlations"
            title += f"<span style='font-size: 12px;'><br />{subtitle}</span>"
        dict_t = copy.deepcopy(self._style_dict["dict_title"])
        dict_t["text"] = title

        fig.update_layout(
            coloraxis=dict(colorscale=["rgb(255, 255, 255)"] + self._style_dict["init_contrib_colorscale"][5:-1]),
            showlegend=True,
            title=dict_t,
            width=width,
            height=height,
        )

        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)

        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

    def plot_amplitude_vs_stability(self, mean_variability, mean_amplitude, column_names, file_name, auto_open):
        """
        Intermediate function used to display the stability plot when plot_type is "none"
        Parameters
        ----------
        mean_variability: array
            Local stability expressed as a mean value for all instances (one value per feature).
            Displayed on the X-axis on the plot.
        mean_amplitude: array
            Average of the normalized SHAP values in the neighborhood. Displayed on the Y-axis on the plot.
        column_names: list
            Columns names that are displayed on the plot
        file_name: string
            Specify the save path of html files. If it is not provided, no file will be saved
        auto_open: bool
            open automatically the plot
        Returns
        -------
        go.Figure
        """
        xaxis_title = (
            "Variability of the Normalized Local Contribution Values"
            + "<span style='font-size: 12px;'><br />(standard deviation / mean)</span>"
        )
        yaxis_title = "Importance<span style='font-size: 12px;'><br />(Average contributions)</span>"
        col_scale, _, _ = self.tuning_colorscale(pd.DataFrame(mean_amplitude))
        hv_text = [
            f"<b>Feature: {col}</b><br />Importance: {y}<br />Variability: {x}"
            for col, x, y in zip(column_names, mean_variability, mean_amplitude)
        ]
        hovertemplate = "%{hovertext}" + "<extra></extra>"

        fig = go.Figure()
        fig.add_scatter(
            x=mean_variability,
            y=mean_amplitude,
            showlegend=False,
            mode="markers",
            marker={
                "color": mean_amplitude,
                "size": 10,
                "opacity": 0.8,
                "line": {"width": 0.8, "color": "white"},
                "colorscale": col_scale,
            },
            hovertext=hv_text,
            hovertemplate=hovertemplate,
        )

        fig.update_xaxes(range=[np.min(np.append(mean_variability, [0.15])) - 0.03, np.max(mean_variability) + 0.03])

        self._update_stability_fig(
            fig=fig,
            x_barlen=len(mean_amplitude),
            y_bar=[0, mean_amplitude.max()],
            xaxis_title=xaxis_title,
            yaxis_title=yaxis_title,
            file_name=file_name,
            auto_open=auto_open,
        )
        return fig

    def plot_stability_distribution(
        self, variability, plot_type, mean_amplitude, dataset, column_names, file_name, auto_open
    ):
        """
        Intermediate function used to display the stability plot when plot_type is "boxplot" or
        "violin"
        Parameters
        ----------
        variability: array
            Local stability expressed as a distribution across all instances
            (one distribution per feature). Displayed on the X-axis on the plot
        plot_type: string
            Defines the type of plot that will be displayed.
            Possible values are "boxplot" or "violin"
        mean_amplitude: array
            Average of the normalized SHAP values in the neighborhood.
            Displayed as a colorscale in the plot.
        dataset: DataFrame
            x_init dataset
        column_names: list
            Columns names that are displayed on the plot
        file_name: string
            Specify the save path of html files. If it is not provided, no file will be saved
        auto_open: bool
            open automatically the plot
        Returns
        -------
        go.Figure
        """
        # Store distribution of variability in a DataFrame
        var_df = pd.DataFrame(variability, columns=column_names)
        mean_amplitude_normalized = pd.Series(mean_amplitude, index=column_names) / mean_amplitude.max()

        # And sort columns by mean amplitude
        var_df = var_df[column_names[mean_amplitude.argsort()]]

        # Add colorscale
        col_scale, _, _ = self.tuning_colorscale(pd.DataFrame(mean_amplitude))
        color_list = mean_amplitude_normalized.tolist()
        color_list.sort()
        color_list = [next(pair[1] for pair in col_scale if x <= pair[0]) for x in color_list]
        height_value = max(500, 40 * dataset.shape[1] if dataset.shape[1] < 100 else 13 * dataset.shape[1])

        xaxis_title = "Normalized local contribution value variability"
        yaxis_title = ""

        # Plot the distribution
        if dataset.shape[1] < 500:
            fig = go.Figure()
            for i, c in enumerate(var_df):
                if plot_type == "boxplot":
                    fig.add_trace(
                        go.Box(
                            x=var_df[c],
                            marker_color=color_list[i],
                            name=c,
                            showlegend=False,
                        )
                    )
                elif plot_type == "violin":
                    fig.add_trace(
                        go.Violin(
                            x=var_df[c],
                            line_color=color_list[i],
                            name=c,
                            showlegend=False,
                        )
                    )

            # Dummy invisible plot to add the color scale
            colorbar_trace = go.Scatter(
                x=[None],
                y=[None],
                mode="markers",
                marker=dict(
                    size=1,
                    color=[mean_amplitude.min(), mean_amplitude.max()],
                    colorscale=col_scale,
                    colorbar=dict(
                        thickness=20,
                        lenmode="pixels",
                        len=300,
                        yanchor="top",
                        y=1,
                        ypad=60,
                        title="Importance<br />(Average contributions)",
                    ),
                    showscale=True,
                ),
                hoverinfo="none",
                showlegend=False,
            )

            fig.add_trace(colorbar_trace)

            fig.update_layout(
                height=height_value,
            )

            self._update_stability_fig(
                fig=fig,
                x_barlen=len(mean_amplitude),
                y_bar=column_names,
                xaxis_title=xaxis_title,
                yaxis_title=yaxis_title,
                file_name=file_name,
                auto_open=auto_open,
            )

            return fig

    def _update_stability_fig(self, fig, x_barlen, y_bar, xaxis_title, yaxis_title, file_name, auto_open):
        """
        Function used for the `plot_stability_distribution` and `plot_amplitude_vs_stability`
        to update the layout of the plotly figure.
        Parameters
        ----------
        fig: plotly.graph_objs._figure.Figure
            Plotly figure to update
        x_barlen: int
            draw a line --> len of x array
        y_bar: list
            draw a line --> y values
        xaxis_title: str
            Title of xaxis
        yaxis_title: str
            Title of yaxis
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        Returns
        -------
        go.Figure
        """
        title = "Importance & Local Stability of explanations:"
        title += "<span style='font-size: 16px;'><br />How similar are explanations for closeby neighbours?</span>"
        dict_t = copy.deepcopy(self._style_dict["dict_title_stability"])
        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_xaxis["text"] = xaxis_title
        dict_yaxis["text"] = yaxis_title
        dict_stability_bar_colors = copy.deepcopy(self._style_dict["dict_stability_bar_colors"])
        dict_t["text"] = title

        fig.add_trace(
            go.Scatter(
                x=[0.15] * x_barlen,
                y=y_bar,
                mode="lines",
                hoverinfo="none",
                line=dict(color=dict_stability_bar_colors[0], dash="dot"),
                name="<-- Stable",
            )
        )

        fig.add_trace(
            go.Scatter(
                x=[0.3] * x_barlen,
                y=y_bar,
                mode="lines",
                hoverinfo="none",
                line=dict(color=dict_stability_bar_colors[1], dash="dot"),
                name="--> Unstable",
            )
        )

        fig.update_layout(
            template="none",
            title=dict_t,
            xaxis_title=dict_xaxis,
            yaxis_title=dict_yaxis,
            coloraxis_showscale=False,
            hovermode="closest",
        )

        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)

        if file_name:
            plot(fig, filename=file_name, auto_open=auto_open)

[docs]    def local_neighbors_plot(self, index, max_features=10, file_name=None, auto_open=False):
        """
        The Local_neighbors_plot has the main objective of increasing confidence \
        in interpreting the contribution values of a selected instance.
        This plot analyzes the local neighborhood of the instance, \
        and compares its contribution values with those of its neighbors.
        Intuitively, for similar instances, we would expect similar contributions.
        Those neighbors are selected as follows :
        * We select top N neighbors for each instance (using L1 norm + variance normalization)
        * We discard neighbors whose model output is too **different** (see equations below)
        from the instance output
        * We discard additional neighbors if their distance to the instance \
        is bigger than a predefined value (to remove outliers)
        In this neighborhood, we would expect instances to have similar SHAP values. \
        If not, one might need to be cautious when interpreting SHAP values.
        The **difference** between outputs is measured with the following distance definition :
        * For regression:
        .. math::
            distance = \\frac{|output_{allFeatures} -
                              output_{currentFeatures}|}{|output_{allFeatures}|}
        * For classification:
        .. math::
            distance = |output_{allFeatures} - output_{currentFeatures}|
        Parameters
        ----------
        index: int
            Contains index row of the input DataFrame that we use to display contribution values in the neighborhood
        max_features: int, optional
            Maximum number of displayed features, by default 10
        file_name: string, optional
            Specify the save path of html files. If it is not provided, no file will be saved, by default None
        auto_open: bool, optional
            open automatically the plot, by default False
        Returns
        -------
        fig
            The figure that will be displayed
        """
        assert index in self.explainer.x_init.index, "index must exist in pandas dataframe"

        self.explainer.compute_features_stability([index])

        column_names = np.array([self.explainer.features_dict.get(x) for x in self.explainer.x_init.columns])

        def ordinal(n):
            return "%d%s" % (n, "tsnrhtdd"[(math.floor(n / 10) % 10 != 1) * (n % 10 < 4) * n % 10 :: 4])

        # Compute explanations for instance and neighbors
        g = self.explainer.local_neighbors["norm_shap"]

        # Reorder indices based on absolute values of the 1st row (i.e. the instance) in descending order
        inds = np.flip(np.abs(g[0, :]).argsort())
        g = g[:, inds]
        columns = [column_names[i] for i in inds]

        # Plot
        g_df = pd.DataFrame(g, columns=columns).T.rename(
            columns={
                **{0: "instance", 1: "closest neighbor"},
                **{i: ordinal(i) + " closest neighbor" for i in range(2, len(g))},
            }
        )

        # Keep only max_features
        if max_features is not None:
            g_df = g_df[:max_features]

        fig = go.Figure(
            data=[
                go.Bar(
                    name=g_df.iloc[::-1, ::-1].columns[i],
                    y=g_df.iloc[::-1, ::-1].index.tolist(),
                    x=g_df.iloc[::-1, ::-1].iloc[:, i],
                    marker_color=(
                        self._style_dict["dict_stability_bar_colors"][1]
                        if i == g_df.shape[1] - 1
                        else self._style_dict["dict_stability_bar_colors"][0]
                    ),
                    orientation="h",
                    opacity=np.clip(0.2 + i * (1 - 0.2) / (g_df.shape[1] - 1), 0.2, 1) if g_df.shape[1] > 1 else 1,
                )
                for i in range(g_df.shape[1])
            ]
        )

        title = f"Comparing local explanations in a neighborhood - Id: <b>{index}</b>"
        title += "<span style='font-size: 16px;'><br />How similar are explanations for closeby neighbours?</span>"
        dict_t = copy.deepcopy(self._style_dict["dict_title_stability"])
        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_xaxis["text"] = "Normalized contribution values"
        dict_yaxis["text"] = ""
        dict_t["text"] = title
        fig.update_layout(
            template="none",
            title=dict_t,
            xaxis_title=dict_xaxis,
            yaxis_title=dict_yaxis,
            hovermode="closest",
            barmode="group",
            height=max(500, 11 * g_df.shape[0] * g_df.shape[1]),
            legend={"traceorder": "reversed"},
            xaxis={"side": "bottom"},
        )

        fig.update_yaxes(automargin=True)
        fig.update_xaxes(automargin=True)

        if file_name is not None:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

[docs]    def stability_plot(
        self,
        selection=None,
        max_points=500,
        force=False,
        max_features=10,
        distribution="none",
        file_name=None,
        auto_open=False,
    ):
        """
        The Stability_plot has the main objective of increasing confidence in contribution values, \
        and helping determine if we can trust an explanation.
        The idea behind local stability is the following : if instances are very similar, \
        then one would expect the explanations to be similar as well.
        Therefore, locally stable explanations are an important factor that help \
        build trust around a particular explanation method.
        The generated graphs can take multiple forms, but they all analyze \
        the same two aspects: for each feature we look at Amplitude vs. Variability. \
        in order terms, how important the feature is on average vs. how the feature impact \
        changes in the instance neighborhood.
        The average importance of the feature is the average SHAP value of the feature acros all considered instances
        The neighborhood is defined as follows :
        * We select top N neighbors for each instance (using L1 norm + variance normalization)
        * We discard neighbors whose model output is too **different** (see equations below) from the instance output
        * We discard additional neighbors if their distance to the instance \
        is bigger than a predefined value (to remove outliers)
        The **difference** between outputs is measured with the following distance definition:
        * For regression:
        .. math::
            distance = \\frac{|output_{allFeatures} - output_{currentFeatures}|}{|output_{allFeatures}|}
        * For classification:
        .. math::
            distance = |output_{allFeatures} - output_{currentFeatures}|
        Parameters
        ----------
        selection: list
            Contains list of index, subset of the input DataFrame that we use for the compute of stability statistics
        max_points: int, optional
            Maximum number to plot in compacity plot, by default 500
        force: bool, optional
            force == True, force the compute of stability values, by default False
        distribution: str, optional
            Add distribution of variability for each feature, by default 'none'.
            The other values are 'boxplot' or 'violin' that specify the type of plot
        file_name: string, optional
            Specify the save path of html files. If it is not provided, no file will be saved, by default None
        auto_open: bool, optional
            open automatically the plot, by default False
        Returns
        -------
        If single instance:
            * plot -- Normalized contribution values of instance and neighbors
        If multiple instances:
            * if distribution == "none": Mean amplitude of each feature contribution vs. mean variability across neighbors
            * if distribution == "boxplot": Distribution of contributions of each feature in instances neighborhoods.
            Graph type is box plot
            * if distribution == "violin": Distribution of contributions of each feature in instances neighborhoods.
            Graph type is violin plot
        """
        # Sampling
        if selection is None:
            if self.explainer.x_init.shape[0] <= max_points:
                list_ind = self.explainer.x_init.index.tolist()
            else:
                list_ind = random.sample(self.explainer.x_init.index.tolist(), max_points)
            # By default, don't compute calculation if it has already been done
            if (self.explainer.features_stability is None) or self.last_stability_selection or force:
                self.explainer.compute_features_stability(list_ind)
            else:
                print("Computed values from previous call are used")
            self.last_stability_selection = False
        elif isinstance(selection, list):
            if len(selection) == 1:
                raise ValueError("Selection must include multiple points")
            if len(selection) > max_points:
                print(
                    f"Size of selection is bigger than max_points (default: {max_points}).\
                      Computation time might be affected"
                )
            self.explainer.compute_features_stability(selection)
            self.last_stability_selection = True
        else:
            raise ValueError("Parameter selection must be a list")

        column_names = np.array([self.explainer.features_dict.get(x) for x in self.explainer.x_init.columns])

        variability = self.explainer.features_stability["variability"]
        amplitude = self.explainer.features_stability["amplitude"]

        mean_variability = variability.mean(axis=0)
        mean_amplitude = amplitude.mean(axis=0)

        # Plot 1 : only show average variability on y-axis
        if distribution not in ["boxplot", "violin"]:
            fig = self.plot_amplitude_vs_stability(mean_variability, mean_amplitude, column_names, file_name, auto_open)

        # Plot 2 : Show distribution of variability
        else:

            # If set, only keep features with the highest mean amplitude
            if max_features is not None:
                keep = mean_amplitude.argsort()[::-1][:max_features]
                keep = np.sort(keep)

                variability = variability[:, keep]
                mean_amplitude = mean_amplitude[keep]
                dataset = self.explainer.x_init.iloc[:, keep]
                column_names = column_names[keep]

            fig = self.plot_stability_distribution(
                variability, distribution, mean_amplitude, dataset, column_names, file_name, auto_open
            )

        return fig

[docs]    def compacity_plot(
        self, selection=None, max_points=2000, force=False, approx=0.9, nb_features=5, file_name=None, auto_open=False
    ):
        """
        The Compacity_plot has the main objective of determining if a small subset of features \
        can be extracted to provide a simpler explanation of the model. \
        indeed, having too many features might negatively affect the model explainability and make it harder to undersand.
        The following two plots are proposed:
        * We identify the minimum number of required features (based on the top contribution values) \
        that well approximate the model, and thus, provide accurate explanations.
        In particular, the prediction with the chosen subset needs to be close enough (*see distance definition below*) \
        to the one obtained with all features.
        * Conversely, we determine how close we get to the output with all features by using only a subset of them.
        *Distance definition*
        * For regression:
        .. math::
            distance = \\frac{|output_{allFeatures} - output_{currentFeatures}|}{|output_{allFeatures}|}
        * For classification:
        .. math::
            distance = |output_{allFeatures} - output_{currentFeatures}|
        Parameters
        ----------
        selection: list
            Contains list of index, subset of the input DataFrame that we use for the compute of stability statistics
        max_points: int, optional
            Maximum number to plot in compacity plot, by default 2000
        force: bool, optional
            force == True, force the compute of stability values, by default False
        approx: float, optional
            How close we want to be from model with all features, by default 0.9 (=90%)
        nb_features: int, optional
            Number of features used, by default 5
        file_name: string, optional
            Specify the save path of html files. If it is not provided, no file will be saved, by default None
        auto_open: bool, optional
            open automatically the plot, by default False
        """
        # Sampling
        if selection is None:
            if self.explainer.x_init.shape[0] <= max_points:
                list_ind = self.explainer.x_init.index.tolist()
            else:
                list_ind = random.sample(self.explainer.x_init.index.tolist(), max_points)
            # By default, don't compute calculation if it has already been done
            if (self.explainer.features_compacity is None) or self.last_compacity_selection or force:
                self.explainer.compute_features_compacity(list_ind, 1 - approx, nb_features)
            else:
                print("Computed values from previous call are used")
            self.last_compacity_selection = False
        elif isinstance(selection, list):
            if len(selection) > max_points:
                print(
                    f"Size of selection is bigger than max_points (default: {max_points}).\
                      Computation time might be affected"
                )
            self.explainer.compute_features_compacity(selection, 1 - approx, nb_features)
            self.last_compacity_selection = True
        else:
            raise ValueError("Parameter selection must be a list")

        features_needed = self.explainer.features_compacity["features_needed"]
        distance_reached = self.explainer.features_compacity["distance_reached"]

        # Make plots
        fig = make_subplots(
            rows=1,
            cols=2,
            subplot_titles=[
                "Number of features required<br>to explain " + str(round(100 * approx)) + "% of the model's output",
                "Percentage of the model output<br>explained by the "
                + str(nb_features)
                + " most important<br>features per instance",
            ],
            horizontal_spacing=0.2,
        )

        # Used as titles in make_subplots are considered annotations
        fig.update_annotations(font=self._style_dict["dict_title_compacity"]["font"])

        # First plot: number of features required for a given approximation
        fig.add_trace(
            go.Histogram(
                x=features_needed,
                histnorm="percent",
                cumulative={"enabled": True},
                name="",
                hovertemplate="Top %{x:.0f} features explain at least "
                + str(round(100 * approx))
                + "%<br>of the model for %{y:.1f}% of the instances",
                hovertext="none",
                marker_color=self._style_dict["dict_compacity_bar_colors"][1],
            ),
            row=1,
            col=1,
        )

        dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_xaxis["text"] = "Number of selected features"
        dict_yaxis["text"] = "Cumulative distribution over<br>dataset's instances (%)"

        fig.update_xaxes(title=dict_xaxis, row=1, col=1)
        fig.update_yaxes(title=dict_yaxis, row=1, col=1)

        # Second plot: approximation reached for a given number of features
        fig.add_trace(
            go.Histogram(
                x=100 * (1 - distance_reached),
                histnorm="percent",
                cumulative={"enabled": True, "direction": "decreasing"},
                name="",
                hovertemplate="Top "
                + str(nb_features)
                + " features explain at least "
                + "%{x:.0f}"
                + "%<br>of the model for %{y:.1f}% of the instances",
                marker_color=self._style_dict["dict_compacity_bar_colors"][0],
            ),
            row=1,
            col=2,
        )

        dict_xaxis2 = copy.deepcopy(self._style_dict["dict_xaxis"])
        dict_yaxis2 = copy.deepcopy(self._style_dict["dict_yaxis"])
        dict_xaxis2["text"] = "Percentage of model output<br>explained (%)"
        dict_yaxis2["text"] = "Cumulative distribution over<br>dataset's instances (%)"

        fig.update_xaxes(title=dict_xaxis2, row=1, col=2)
        fig.update_yaxes(title=dict_yaxis2, row=1, col=2)

        title = "Compacity of explanations:"
        title += "<span style='font-size: 16px;'><br />How many variables are enough to produce accurate explanations?</span>"
        dict_t = copy.deepcopy(self._style_dict["dict_title_stability"])
        dict_t["text"] = title

        fig.update_layout(
            template="none",
            title=dict_t,
            title_y=0.8,
            hovermode="closest",
            margin={"t": 150},
            showlegend=False,
        )

        if file_name is not None:
            plot(fig, filename=file_name, auto_open=auto_open)

        return fig

    def scatter_plot_prediction(
        self,
        selection=None,
        label=-1,
        max_points=2000,
        width=900,
        height=600,
        file_name=None,
        auto_open=False,
    ):
        """
        scatter_plot_prediction displays a Plotly scatter or violin plot of predictions in comparison to the target variable.
        This plot represents Trues Values versus Predicted Values.

        This plot allows the user to understand the distribution of predictions in comparison to the target variable.
        With the web app, it is possible to select the wrong or correct predictions or a subset of predictions.

        Parameters
        ----------
        selection: list (optional)
            Contains list of index, subset of the input DataFrame that we want to plot
        label: integer or string (default -1)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        max_points: int (optional, default: 2000)
            maximum number to plot in contribution plot. if input dataset is bigger than max_points,
            a sample limits the number of points to plot.
            nb: you can also limit the number using 'selection' parameter.
        width : Int (default: 900)
            Plotly figure - layout width
        height : Int (default: 600)
            Plotly figure - layout height
        file_name: string (optional)
            Specify the save path of html files. If it is not provided, no file will be saved.
        auto_open: bool (default=False)
            open automatically the plot
        """
        if self.explainer.y_target is not None:
            # Sampling
            list_ind, addnote = self.explainer.plot._subset_sampling(selection, max_points)

            # Classification Case
            if self.explainer._case == "classification":
                fig, subtitle = self.explainer.plot._prediction_classification_plot(list_ind, label)

            # Regression Case
            elif self.explainer._case == "regression":
                fig, subtitle = self.explainer.plot._prediction_regression_plot(list_ind)

            # Add traces, title and template
            title = "True Values Vs Predicted Values"
            if subtitle and addnote:
                title += "<br><sup>" + subtitle + " - " + addnote + "</sup>"
            elif subtitle:
                title += "<br><sup>" + subtitle + "</sup>"
            else:
                title += "<br><sup>" + addnote + "</sup>"
            dict_t = copy.deepcopy(self._style_dict["dict_title"])
            dict_xaxis = copy.deepcopy(self._style_dict["dict_xaxis"])
            dict_yaxis = copy.deepcopy(self._style_dict["dict_yaxis"])
            dict_t["text"] = title
            dict_xaxis["text"] = truncate_str("True Values", 110)
            dict_yaxis["text"] = "Predicted Values"

            fig.update_traces(marker={"size": 10, "opacity": 0.8, "line": {"width": 0.8, "color": "white"}})

            fig.update_layout(
                template="none",
                title=dict_t,
                width=width,
                height=height,
                xaxis_title=dict_xaxis,
                yaxis_title=dict_yaxis,
                hovermode="closest",
            )

            fig.update_yaxes(automargin=True)
            fig.update_xaxes(automargin=True)
            if file_name:
                plot(fig, filename=file_name, auto_open=auto_open)

        else:
            fig = go.Figure()
            fig.update_layout(
                xaxis={"visible": False},
                yaxis={"visible": False},
                annotations=[
                    {
                        "text": "Provide the y_target argument in the compile() method to display this plot.",
                        "xref": "paper",
                        "yref": "paper",
                        "showarrow": False,
                        "font": {"size": 14},
                    }
                ],
            )

        return fig

    def _prediction_classification_plot(
        self,
        list_ind,
        label=-1,
    ):
        """
        _prediction_classification_plot displays a Plotly violin plot of predictions in comparison to the target variable.
        This plot represents Trues Values versus Predicted Values.

        This plot allows the user to understand the distribution of predictions in comparison to the target variable.
        With the web app, it is possible to select the wrong or correct predictions or a subset of predictions.

        Parameters
        ----------
        label: integer or string (default -1)
            If the label is of string type, check if it can be changed to integer to select the
            good dataframe object.
        list_ind: list
            Contains list of index that we want to plot
        """
        fig = go.Figure()

        label_num, _, label_value = self.explainer.check_label_name(label)
        # predict proba Color scale
        if self.explainer.proba_values is not None:
            # Assign proba values of the target
            df_proba_target = self.explainer.proba_values.copy()
            df_proba_target["proba_target"] = df_proba_target.iloc[:, label_num]
            proba_values = df_proba_target[["proba_target"]]
            # Proba subset:
            proba_values = proba_values.loc[list_ind, :]
            target = self.explainer.y_target.loc[list_ind, :]
            y_pred = self.explainer.y_pred.loc[list_ind, :]
            df_pred = pd.concat(
                [proba_values.reset_index(), y_pred.reset_index(drop=True), target.reset_index(drop=True)], axis=1
            )
            df_pred.set_index(df_pred.columns[0], inplace=True)
            df_pred.columns = ["proba_values", "predict_class", "target"]
            df_pred["wrong_predict"] = 1
            df_pred.loc[(df_pred["predict_class"] == df_pred["target"]), "wrong_predict"] = 0
            subtitle = f"Response: <b>{label_value}</b>"

        # Plot distribution
        fig.add_trace(
            go.Violin(
                x=df_pred["target"].values.flatten(),
                y=df_pred["proba_values"].values.flatten(),
                points=False,
                legendgroup="M",
                scalegroup="M",
                name="Correct Prediction",
                line_color=self._style_dict["violin_area_classif"][1],
                pointpos=-0.1,
                showlegend=False,
                jitter=0.075,
                meanline_visible=True,
                spanmode="hard",
                customdata=df_pred["proba_values"].index.values,
                scalemode="count",
            )
        )

        # Plot points depending if wrong or correct prediction
        df_correct_predict = df_pred[(df_pred["wrong_predict"] == 0)]
        df_wrong_predict = df_pred[(df_pred["wrong_predict"] == 1)]
        hv_text_correct_predict = [
            f"Id: {x}<br />Predicted Values: {y:.3f}<br />Predicted class: {w}<br />True Values: {z}<br />"
            for x, y, w, z in zip(
                df_correct_predict.index,
                df_correct_predict.proba_values.values.round(3).flatten(),
                df_correct_predict.predict_class.values.flatten(),
                df_correct_predict.target.values.flatten(),
            )
        ]
        hv_text_wrong_predict = [
            f"Id: {x}<br />Predicted Values: {y:.3f}<br />Predicted class: {w}<br />True Values: {z}<br />"
            for x, y, w, z in zip(
                df_wrong_predict.index,
                df_wrong_predict.proba_values.values.round(3).flatten(),
                df_wrong_predict.predict_class.values.flatten(),
                df_wrong_predict.target.values.flatten(),
            )
        ]

        fig.add_trace(
            go.Scatter(
                x=df_correct_predict["target"].values.flatten() + np.random.normal(0, 0.02, len(df_correct_predict)),
                y=df_correct_predict["proba_values"].values.flatten(),
                mode="markers",
                marker_color=self._style_dict["prediction_plot"][1],
                showlegend=True,
                name="Correct Prediction",
                hovertext=hv_text_correct_predict,
                hovertemplate="<b>%{hovertext}</b><br />",
                customdata=df_correct_predict["proba_values"].index.values,
            )
        )

        fig.add_trace(
            go.Scatter(
                x=df_wrong_predict["target"].values.flatten() + np.random.normal(0, 0.02, len(df_wrong_predict)),
                y=df_wrong_predict["proba_values"].values.flatten(),
                mode="markers",
                marker_color=self._style_dict["prediction_plot"][0],
                showlegend=True,
                name="Wrong Prediction",
                hovertext=hv_text_wrong_predict,
                hovertemplate="<b>%{hovertext}</b><br />",
                customdata=df_wrong_predict["proba_values"].index.values,
            )
        )

        fig.update_layout(violingap=0, violinmode="overlay")
        if self.explainer.label_dict is not None:
            fig.update_xaxes(
                tickmode="array",
                tickvals=list(df_pred["target"].unique()),
                ticktext=list(df_pred["target"].apply(lambda x: self.explainer.label_dict[x]).unique()),
            )
        if self.explainer.label_dict is None:
            fig.update_xaxes(tickvals=sorted(list(df_pred["target"].unique())))

        return fig, subtitle

    def _prediction_regression_plot(
        self,
        list_ind,
    ):
        """
        _prediction_regression_plot displays a Plotly scatter plot of predictions in comparison to the target variable.
        This plot represents Trues Values versus Predicted Values.

        This plot allows the user to understand the distribution of predictions in comparison to the target variable.
        With the web app, it is possible to select the wrong or correct predictions or a subset of predictions.

        Parameters
        ----------
        list_ind: list
            Contains list of index that we want to plot
        """
        fig = go.Figure()

        subtitle = None
        prediction_error = self.explainer.prediction_error
        if prediction_error is not None:
            if (self.explainer.y_target == 0).any().iloc[0]:
                subtitle = "Prediction Error = abs(True Values - Predicted Values)"
            else:
                subtitle = "Prediction Error = abs(True Values - Predicted Values) / True Values"
            df_equal_bins = prediction_error.describe(percentiles=np.arange(0.1, 1, 0.1).tolist())
            equal_bins = df_equal_bins.loc[~df_equal_bins.index.isin(["count", "mean", "std"])].values
            equal_bins = np.unique(equal_bins)
            bins_list = [i for i in equal_bins]
            values = pd.DataFrame(pd.cut([val[0] for val in prediction_error.values], bins=bins_list, labels=False))
            col_scale, _, _ = self.tuning_colorscale(values, keep_90_pct=False)

            y_target = self.explainer.y_target.loc[list_ind]
            if len(y_target) > 500:
                lower_quantile = y_target.iloc[:, 0].quantile(0.005)
                upper_quantile = y_target.iloc[:, 0].quantile(0.995)
                y_target_tmp = y_target.iloc[:, 0][
                    (y_target.iloc[:, 0] > lower_quantile) & (y_target.iloc[:, 0] < upper_quantile)
                ]
                if len(y_target_tmp) > 0.95 * len(y_target):
                    y_target = y_target_tmp
                else:
                    y_target_tmp = y_target.iloc[:, 0][(y_target.iloc[:, 0] < upper_quantile)]
                    if len(y_target_tmp) > 0.95 * len(y_target):
                        y_target = y_target_tmp
                    else:
                        y_target_tmp = y_target.iloc[:, 0][(y_target.iloc[:, 0] > lower_quantile)]
                        if len(y_target_tmp) > 0.95 * len(y_target):
                            y_target = y_target_tmp

            y_target_values = y_target.values.flatten()

            y_pred = self.explainer.y_pred.loc[y_target.index]
            prediction_error = np.array(prediction_error.loc[y_target.index])

            feature_values_array = y_target_values
            if len(feature_values_array) > 2:
                y_pred_flatten = y_pred.values.flatten()
                y_pred_flatten_min = min(y_pred_flatten)

                h = max(y_pred_flatten) - y_pred_flatten_min

                feature_values_min, feature_values_max = min(feature_values_array), max(feature_values_array)
                val_inter = feature_values_max - feature_values_min
                from sklearn.neighbors import KernelDensity

                feature_np = np.array(feature_values_array)
                feature_np = feature_np[~np.isnan(feature_np)][:, None]
                kde = KernelDensity(bandwidth=val_inter / 300, kernel="epanechnikov").fit(feature_np)
                xs = np.linspace(feature_values_min, feature_values_max, 1000)
                log_dens = kde.score_samples(xs[:, None])
                y_upper = np.exp(log_dens) * h / (np.max(np.exp(log_dens)) * 3) + y_pred_flatten_min
                y_lower = np.full_like(y_upper, y_pred_flatten_min)

                # Create the density plot
                density_plot = go.Scatter(
                    x=np.concatenate([xs, xs[::-1]]),
                    y=np.concatenate([y_upper, y_lower[::-1]]),
                    fill="toself",
                    hoverinfo="none",
                    showlegend=False,
                    line={"color": self._style_dict["contrib_distribution"]},
                )
                # Add density plot
                fig.add_trace(density_plot)

            # round predict
            if self.round_digit is None:
                self.tuning_round_digit()
            y_pred = y_pred.map(lambda x: round(x, self.round_digit))
            y_pred_flatten = y_pred.values.flatten()

            hv_text = [
                f"Id: {x}<br />True Values: {y:,.2f}<br />Predicted Values: {z:,.2f}<br />Prediction Error: {w:,.2f}"
                for x, y, z, w in zip(
                    y_target.index, y_target_values, y_pred.values.flatten(), prediction_error.flatten()
                )
            ]

            fig.add_scatter(
                x=y_target_values,
                y=y_pred_flatten,
                mode="markers",
                hovertext=hv_text,
                hovertemplate="<b>%{hovertext}</b><br />",
                customdata=y_pred.index.values,
                showlegend=False,
            )

            colorpoints = pd.cut([val[0] for val in prediction_error], bins=bins_list, labels=False) / 10
            colorbar_title = "Prediction Error"
            fig.data[-1].marker.color = colorpoints.flatten()
            fig.data[-1].marker.coloraxis = "coloraxis"
            fig.layout.coloraxis.colorscale = col_scale
            fig.layout.coloraxis.colorbar = {
                "title": {"text": colorbar_title},
                "tickvals": [col_scale[0][0], col_scale[-1][0] - 0.15],
                "ticktext": [float("{:0.3f}".format(equal_bins[0])), float("{:0.3f}".format(equal_bins[-2]))],
                "tickformat": ".2s",
                "yanchor": "top",
                "y": 1.1,
            }
            range_axis = [
                min(min(y_target_values), min(y_pred_flatten)),
                max(max(y_target_values), max(y_pred_flatten)),
            ]
            fig.update_xaxes(range=range_axis)
            fig.update_yaxes(range=range_axis)
            fig.update_layout(
                shapes=[
                    {
                        "type": "line",
                        "yref": "y domain",
                        "xref": "x domain",
                        "y0": 0,
                        "y1": 1,
                        "x0": 0,
                        "x1": 1,
                        "line": dict(color="grey", width=1, dash="dot"),
                    }
                ]
            )

        return fig, subtitle

    def _subset_sampling(self, selection=None, max_points=2000, col=None, col_value_count=0):
        """
        Samples a subset of indices for plotting, optionally creating a note for the plot subtitle.

        Parameters
        ----------
        selection : list, optional
            A list of indices specifying a subset of the DataFrame for plotting.
        max_points : int, optional
            The maximum number of points to plot. Defaults to 2000.
        col : str, optional
            The column name based on which intelligent sampling is performed.
        col_value_count : int, optional
            The count of unique values in the specified column. Used for determining sampling strategy.

        Returns
        -------
        tuple
            A tuple containing the selected indices and an additional note.
        """
        random_seed = 79
        random.seed(random_seed)

        # Determine the sampling strategy
        selected_indices, additional_note = self._determine_sampling_strategy(
            selection, max_points, col, col_value_count, random_seed
        )

        # Format the additional note
        if additional_note is not None:
            additional_note = self._format_additional_note(selected_indices, additional_note)

        return selected_indices, additional_note

    def _determine_sampling_strategy(self, selection, max_points, col, col_value_count, random_seed):
        """
        Determines the sampling strategy based on the input parameters.
        """
        if selection is None:
            return self._no_selection_sampling(max_points, col, col_value_count, random_seed)
        elif isinstance(selection, list):
            return self._list_selection_sampling(selection, max_points, col, col_value_count, random_seed)
        else:
            raise ValueError("Parameter 'selection' must be a list.")

    def _no_selection_sampling(self, max_points, col, col_value_count, random_seed):
        """
        Handles sampling when no specific selection is made.
        """
        if self.explainer.x_init.shape[0] <= max_points:
            return self.explainer.x_init.index.tolist(), None
        elif col is None:
            selected_indices = random.sample(self.explainer.x_init.index.tolist(), max_points)
            return selected_indices, "Length of random Subset: "
        else:
            selected_indices = self._intelligent_sampling(
                self.explainer.x_init, max_points, col, col_value_count, random_seed
            )
            return selected_indices, "Length of smart Subset: "

    def _list_selection_sampling(self, selection, max_points, col, col_value_count, random_seed):
        """
        Handles sampling when a specific list of indices is provided.
        """
        if len(selection) <= max_points:
            return selection, "Length of user-defined Subset: "
        elif col is None:
            selected_indices = random.sample(selection, max_points)
            return selected_indices, "Length of random Subset: "
        else:
            subset = self.explainer.x_init.loc[selection]
            selected_indices = self._intelligent_sampling(subset, max_points, col, col_value_count, random_seed)
            return selected_indices, "Length of smart Subset: "

    def _intelligent_sampling(self, data, max_points, col, col_value_count, random_seed):
        """
        Performs intelligent sampling based on the distribution of values in the specified column.
        """
        rng = np.random.default_rng(seed=random_seed)
        is_col_str = True
        if data[col].dtype.kind in "fc":
            try:
                if data[col].str.isnumeric().all():
                    is_col_str = False
            except AttributeError:
                is_col_str = False

        if (col_value_count < len(data[col]) / 20) or is_col_str:
            cluster_labels = data[col]
            cluster_counts = cluster_labels.value_counts()
        else:
            n_clusters = min(100, len(data[col]) // 20)
            kmeans = KMeans(n_clusters=n_clusters, random_state=random_seed, n_init="auto")
            cluster_labels = pd.Series(kmeans.fit_predict(data[col].values.reshape(-1, 1)))
            cluster_counts = cluster_labels.value_counts()

        weights = cluster_counts.apply(lambda x: (x ** 0.5) / x).to_dict()
        selection_weights = cluster_labels.apply(lambda x: weights[x])
        selection_weights /= selection_weights.sum()
        selected_indices = rng.choice(data.index.tolist(), max_points, p=selection_weights, replace=False)
        return selected_indices

    def _format_additional_note(self, selected_indices, additional_note):
        """
        Formats the additional note with the length and percentage of the selected subset.
        """
        percentage = int(np.round(100 * len(selected_indices) / self.explainer.x_init.shape[0]))
        return f"{additional_note}{len(selected_indices)} ({percentage}%)"