@incollection{icml2020_625,
abstract = {The attribution problem, that is the problem of attributing a model\textquotesingle s prediction to its base features, is well-studied. We extend the notion of attribution to also apply to feature interactions.
The Shapley value is a commonly used method to attribute a model\textquotesingle s prediction to its base features. We propose a generalization of the Shapley value called Shapley-Taylor index that attributes the model\textquotesingle s prediction to interactions of subsets of features up to some size \textdollar k\textdollar . The method is analogous to how the truncated Taylor Series decomposes the function value at a certain point using its derivatives at a different point. In fact, we show that the Shapley Taylor index is equal to the Taylor Series of the multilinear extension of the set-theoretic behavior of the model.
We axiomatize this method using the standard Shapley axioms---linearity, dummy, symmetry and efficiency---and an additional axiom that we call the interaction distribution axiom. This new axiom explicitly characterizes how interactions are distributed for a class of functions that model pure interaction.
We contrast the Shapley-Taylor index against the previously proposed Shapley Interaction index from the cooperative game theory literature. We also apply the Shapley Taylor index to three models and identify interesting qualitative insights.},
author = {Sundararajan, Mukund and Dhamdhere, Kedar and Agarwal, Ashish},
booktitle = {Proceedings of Machine Learning and Systems 2020},
pages = {944--957},
title = {The Shapley Taylor Interaction Index},
year = {2020}
}