1
- # Functions required only for handling exact cases
1
+ # Functions required only for handling (partly) exact cases
2
2
3
3
# Provides fixed input for the exact case:
4
4
# - Z: Matrix with all 2^p-2 on-off vectors z
5
5
# - w: Vector with row weights of Z ensuring that the distribution of sum(z) matches
6
6
# the SHAP kernel distribution
7
7
# - A: Exact matrix A = Z'wZ
8
- input_exact <- function (p ) {
9
- Z <- exact_Z(p )
8
+ input_exact <- function (p , feature_names ) {
9
+ Z <- exact_Z(p , feature_names = feature_names )
10
10
# Each Kernel weight(j) is divided by the number of vectors z having sum(z) = j
11
11
w <- kernel_weights(p ) / choose(p , 1 : (p - 1L ))
12
- list (Z = Z , w = w [rowSums(Z )], A = exact_A(p ))
12
+ list (Z = Z , w = w [rowSums(Z )], A = exact_A(p , feature_names = feature_names ))
13
13
}
14
14
15
- # Calculates exact A. Notice the difference to the off-diagnonals in the Supplement of
16
- # Covert and Lee (2021). Credits to David Watson for figuring out the correct formula,
17
- # see our discussions in https://github.com/ModelOriented/kernelshap/issues/22
18
- exact_A <- function (p ) {
15
+ # ' Exact Matrix A
16
+ # '
17
+ # ' Internal function that calculates exact A.
18
+ # ' Notice the difference to the off-diagnonals in the Supplement of
19
+ # ' Covert and Lee (2021). Credits to David Watson for figuring out the correct formula,
20
+ # ' see our discussions in https://github.com/ModelOriented/kernelshap/issues/22
21
+ # '
22
+ # ' @noRd
23
+ # ' @keywords internal
24
+ # '
25
+ # ' @param p Number of features.
26
+ # ' @param feature_names Feature names.
27
+ # ' @returns A (p x p) matrix.
28
+ exact_A <- function (p , feature_names ) {
19
29
S <- 1 : (p - 1L )
20
30
c_pr <- S * (S - 1 ) / p / (p - 1 )
21
31
off_diag <- sum(kernel_weights(p ) * c_pr )
22
- A <- matrix (off_diag , nrow = p , ncol = p )
32
+ A <- matrix (
33
+ off_diag , nrow = p , ncol = p , dimnames = list (feature_names , feature_names )
34
+ )
23
35
diag(A ) <- 0.5
24
36
A
25
37
}
26
38
27
- # Creates (2^p-2) x p matrix with all on-off vectors z of length p
28
- # Instead of calculating this object, we could evaluate it for different p <= p_max
29
- # and store it as a list in the package.
30
- exact_Z <- function (p ) {
39
+ # ' All on-off Vectors
40
+ # '
41
+ # ' Internal function that creates matrix of all on-off vectors of length `p`.
42
+ # '
43
+ # ' @noRd
44
+ # ' @keywords internal
45
+ # '
46
+ # ' @param p Number of features.
47
+ # ' @param feature_names Feature names.
48
+ # ' @returns An integer ((2^p - 2) x p) matrix of all on-off vectors of length `p`.
49
+ exact_Z <- function (p , feature_names ) {
31
50
Z <- as.matrix(do.call(expand.grid , replicate(p , 0 : 1 , simplify = FALSE )))
32
- dimnames (Z ) <- NULL
51
+ colnames (Z ) <- feature_names
33
52
Z [2 : (nrow(Z ) - 1L ), , drop = FALSE ]
34
53
}
35
54
36
55
# List all length p vectors z with sum(z) in {k, p - k}
37
- partly_exact_Z <- function (p , k ) {
56
+ partly_exact_Z <- function (p , k , feature_names ) {
38
57
if (k < 1L ) {
39
58
stop(" k must be at least 1" )
40
59
}
@@ -48,17 +67,18 @@ partly_exact_Z <- function(p, k) {
48
67
utils :: combn(seq_len(p ), k , FUN = function (z ) {x <- numeric (p ); x [z ] <- 1 ; x })
49
68
)
50
69
}
51
- if (p = = 2L * k ) {
52
- return ( Z )
70
+ if (p ! = 2L * k ) {
71
+ Z <- rbind( Z , 1 - Z )
53
72
}
54
- return (rbind(Z , 1 - Z ))
73
+ colnames(Z ) <- feature_names
74
+ Z
55
75
}
56
76
57
77
# Create Z, w, A for vectors z with sum(z) in {k, p-k} for k in {1, ..., deg}.
58
78
# The total weights do not sum to one, except in the special (exact) case deg=p-deg.
59
79
# (The remaining weight will be added via input_sampling(p, deg=deg)).
60
80
# Note that for a given k, the weights are constant.
61
- input_partly_exact <- function (p , deg ) {
81
+ input_partly_exact <- function (p , deg , feature_names ) {
62
82
if (deg < 1L ) {
63
83
stop(" deg must be at least 1" )
64
84
}
@@ -70,7 +90,7 @@ input_partly_exact <- function(p, deg) {
70
90
Z <- w <- vector(" list" , deg )
71
91
72
92
for (k in seq_len(deg )) {
73
- Z [[k ]] <- partly_exact_Z(p , k = k )
93
+ Z [[k ]] <- partly_exact_Z(p , k = k , feature_names = feature_names )
74
94
n <- nrow(Z [[k ]])
75
95
w_tot <- kw [k ] * (2 - (p == 2L * k ))
76
96
w [[k ]] <- rep(w_tot / n , n )
@@ -82,20 +102,21 @@ input_partly_exact <- function(p, deg) {
82
102
}
83
103
84
104
# Case p = 1 returns exact Shapley values
85
- case_p1 <- function (n , nms , v0 , v1 , X , verbose ) {
105
+ case_p1 <- function (n , feature_names , v0 , v1 , X , verbose ) {
86
106
txt <- " Exact Shapley values (p = 1)"
87
107
if (verbose ) {
88
108
message(txt )
89
109
}
90
- S <- v1 - v0 [rep(1L , n ), , drop = FALSE ]
91
- SE <- matrix (numeric (n ), dimnames = list (NULL , nms ) )
110
+ S <- v1 - v0 [rep(1L , n ), , drop = FALSE ] # (n x K)
111
+ SE <- matrix (numeric (n ), dimnames = list (NULL , feature_names )) # (n x 1 )
92
112
if (ncol(v1 ) > 1L ) {
93
113
SE <- replicate(ncol(v1 ), SE , simplify = FALSE )
94
114
S <- lapply(
95
- asplit(S , MARGIN = 2L ), function (M ) as.matrix(M , dimnames = list (NULL , nms ))
115
+ asplit(S , MARGIN = 2L ), function (M )
116
+ as.matrix(M , dimnames = list (NULL , feature_names ))
96
117
)
97
118
} else {
98
- colnames(S ) <- nms
119
+ colnames(S ) <- feature_names
99
120
}
100
121
out <- list (
101
122
S = S ,
0 commit comments