Source code for kalelinear.utils._base

import numpy as np
from numpy.linalg import inv, multi_dot
from scipy.linalg import sqrtm
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.neighbors import kneighbors_graph

from kalelinear.utils._backend import to_numpy


[docs] def lap_norm(X, n_neighbour=3, metric="cosine", mode="distance", normalise=True): """[summary] Parameters ---------- X : [type] [description] n_neighbour : int, optional [description], by default 3 metric : str, optional [description], by default 'cosine' mode : str, optional {'connectivity', 'distance'}, by default 'distance'. Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, and 'distance' will return the distances between neighbors according to the given metric. normalise : bool, optional [description], by default True Returns ------- [type] [description] """ x_np = to_numpy(X) n = x_np.shape[0] knn_graph = kneighbors_graph(x_np, n_neighbour, metric=metric, mode=mode).toarray() W = np.zeros((n, n)) knn_idx = np.logical_or(knn_graph, knn_graph.T) if mode == "distance": graph_kernel = pairwise_distances(x_np, metric=metric) W[knn_idx] = graph_kernel[knn_idx] else: W[knn_idx] = 1 D = np.diag(np.sum(W, axis=1)) if normalise: D_ = inv(sqrtm(D)) lap_mat = np.eye(n) - multi_dot([D_, W, D_]) else: lap_mat = D - W return np.asarray(lap_mat)
[docs] def mmd_coef(ns, nt, ys=None, yt=None, kind="marginal", mu=0.5): ys_np = to_numpy(ys) if ys is not None else None yt_np = to_numpy(yt) if yt is not None else None n = ns + nt e = np.zeros((n, 1)) e[:ns, 0] = 1.0 / ns e[ns:, 0] = -1.0 / nt M = np.dot(e, e.T) # marginal mmd coefficients if kind == "joint" and ys_np is not None: Mc = 0 # conditional mmd coefficients class_all = np.unique(ys_np) if yt_np is not None: target_classes = np.unique(yt_np) if not np.array_equal(class_all, target_classes): raise ValueError("Source and target domain should have the same labels") for c in class_all: es = np.zeros([ns, 1]) es[np.where(ys_np == c)] = 1.0 / (np.where(ys_np == c)[0].shape[0]) et = np.zeros([nt, 1]) if yt_np is not None: et[np.where(yt_np == c)[0]] = -1.0 / np.where(yt_np == c)[0].shape[0] e = np.vstack((es, et)) e[np.where(np.isinf(e))[0]] = 0 Mc = Mc + np.dot(e, e.T) M = (1 - mu) * M + mu * Mc # joint mmd coefficients return np.asarray(M)
[docs] def centering_matrix(size, dtype=np.float64): """Generate a centering matrix.""" unit_matrix = np.eye(size, dtype=dtype) mean_matrix = np.array(1.0 / size, dtype=dtype) * np.ones((size, size), dtype=dtype) return unit_matrix - mean_matrix
[docs] def centered_kernel_matrix(X, kernel="linear", metric=None, filter_params=True, **kwargs): """Compute a centered kernel matrix for samples in X.""" x_np = to_numpy(X) kernel_metric = kernel if metric is None else metric kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs) kernel_matrix[np.isnan(kernel_matrix)] = 0 h_matrix = centering_matrix(kernel_matrix.shape[0], dtype=kernel_matrix.dtype) centered_kernel = multi_dot([h_matrix, kernel_matrix, h_matrix]) return np.asarray(centered_kernel)
[docs] def hsic_grad_term(w, X, covariates): """Compute X.T H C C.T H X w for linear-kernel HSIC regularization.""" w_np = to_numpy(w) x_np = to_numpy(X) centered_covariate_kernel = to_numpy(centered_kernel_matrix(covariates)) grad_term = multi_dot([x_np.T, centered_covariate_kernel, x_np, w_np]) return np.asarray(grad_term)
[docs] def kernel_fit_matrices(X, kernel="linear", metric=None, filter_params=True, **kwargs): """Prepare common fit-time kernel, identity, and centering matrices.""" x_np = to_numpy(X) n = x_np.shape[0] kernel_metric = kernel if metric is None else metric x_kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs) x_kernel_matrix[np.isnan(x_kernel_matrix)] = 0 unit_matrix = np.eye(n) h_matrix = centering_matrix(n, dtype=x_kernel_matrix.dtype) return ( np.asarray(x_kernel_matrix), np.asarray(unit_matrix), np.asarray(h_matrix), n, )
[docs] def base_init(X, kernel="linear", **kwargs): return kernel_fit_matrices(X, kernel=kernel, **kwargs)