import numpy as np
from numpy.linalg import inv, multi_dot
from scipy.linalg import sqrtm
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.neighbors import kneighbors_graph
from kalelinear.utils._backend import to_numpy
[docs]
def lap_norm(X, n_neighbour=3, metric="cosine", mode="distance", normalise=True):
"""[summary]
Parameters
----------
X : [type]
[description]
n_neighbour : int, optional
[description], by default 3
metric : str, optional
[description], by default 'cosine'
mode : str, optional
{'connectivity', 'distance'}, by default 'distance'. Type of
returned matrix: 'connectivity' will return the connectivity
matrix with ones and zeros, and 'distance' will return the
distances between neighbors according to the given metric.
normalise : bool, optional
[description], by default True
Returns
-------
[type]
[description]
"""
x_np = to_numpy(X)
n = x_np.shape[0]
knn_graph = kneighbors_graph(x_np, n_neighbour, metric=metric, mode=mode).toarray()
W = np.zeros((n, n))
knn_idx = np.logical_or(knn_graph, knn_graph.T)
if mode == "distance":
graph_kernel = pairwise_distances(x_np, metric=metric)
W[knn_idx] = graph_kernel[knn_idx]
else:
W[knn_idx] = 1
D = np.diag(np.sum(W, axis=1))
if normalise:
D_ = inv(sqrtm(D))
lap_mat = np.eye(n) - multi_dot([D_, W, D_])
else:
lap_mat = D - W
return np.asarray(lap_mat)
[docs]
def mmd_coef(ns, nt, ys=None, yt=None, kind="marginal", mu=0.5):
ys_np = to_numpy(ys) if ys is not None else None
yt_np = to_numpy(yt) if yt is not None else None
n = ns + nt
e = np.zeros((n, 1))
e[:ns, 0] = 1.0 / ns
e[ns:, 0] = -1.0 / nt
M = np.dot(e, e.T) # marginal mmd coefficients
if kind == "joint" and ys_np is not None:
Mc = 0 # conditional mmd coefficients
class_all = np.unique(ys_np)
if yt_np is not None:
target_classes = np.unique(yt_np)
if not np.array_equal(class_all, target_classes):
raise ValueError("Source and target domain should have the same labels")
for c in class_all:
es = np.zeros([ns, 1])
es[np.where(ys_np == c)] = 1.0 / (np.where(ys_np == c)[0].shape[0])
et = np.zeros([nt, 1])
if yt_np is not None:
et[np.where(yt_np == c)[0]] = -1.0 / np.where(yt_np == c)[0].shape[0]
e = np.vstack((es, et))
e[np.where(np.isinf(e))[0]] = 0
Mc = Mc + np.dot(e, e.T)
M = (1 - mu) * M + mu * Mc # joint mmd coefficients
return np.asarray(M)
[docs]
def centering_matrix(size, dtype=np.float64):
"""Generate a centering matrix."""
unit_matrix = np.eye(size, dtype=dtype)
mean_matrix = np.array(1.0 / size, dtype=dtype) * np.ones((size, size), dtype=dtype)
return unit_matrix - mean_matrix
[docs]
def centered_kernel_matrix(X, kernel="linear", metric=None, filter_params=True, **kwargs):
"""Compute a centered kernel matrix for samples in X."""
x_np = to_numpy(X)
kernel_metric = kernel if metric is None else metric
kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs)
kernel_matrix[np.isnan(kernel_matrix)] = 0
h_matrix = centering_matrix(kernel_matrix.shape[0], dtype=kernel_matrix.dtype)
centered_kernel = multi_dot([h_matrix, kernel_matrix, h_matrix])
return np.asarray(centered_kernel)
[docs]
def hsic_grad_term(w, X, covariates):
"""Compute X.T H C C.T H X w for linear-kernel HSIC regularization."""
w_np = to_numpy(w)
x_np = to_numpy(X)
centered_covariate_kernel = to_numpy(centered_kernel_matrix(covariates))
grad_term = multi_dot([x_np.T, centered_covariate_kernel, x_np, w_np])
return np.asarray(grad_term)
[docs]
def kernel_fit_matrices(X, kernel="linear", metric=None, filter_params=True, **kwargs):
"""Prepare common fit-time kernel, identity, and centering matrices."""
x_np = to_numpy(X)
n = x_np.shape[0]
kernel_metric = kernel if metric is None else metric
x_kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs)
x_kernel_matrix[np.isnan(x_kernel_matrix)] = 0
unit_matrix = np.eye(n)
h_matrix = centering_matrix(n, dtype=x_kernel_matrix.dtype)
return (
np.asarray(x_kernel_matrix),
np.asarray(unit_matrix),
np.asarray(h_matrix),
n,
)
[docs]
def base_init(X, kernel="linear", **kwargs):
return kernel_fit_matrices(X, kernel=kernel, **kwargs)