Source code for kalelinear.utils._base

import numpy as np
from numpy.linalg import inv, multi_dot
from scipy.linalg import sqrtm
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.neighbors import kneighbors_graph

from kalelinear.utils._backend import to_numpy



[docs]
def lap_norm(X, n_neighbour=3, metric="cosine", mode="distance", normalise=True):
    """[summary]

    Parameters
    ----------
    X : [type]
        [description]
    n_neighbour : int, optional
        [description], by default 3
    metric : str, optional
        [description], by default 'cosine'
    mode : str, optional
        {'connectivity', 'distance'}, by default 'distance'. Type of
        returned matrix: 'connectivity' will return the connectivity
        matrix with ones and zeros, and 'distance' will return the
        distances between neighbors according to the given metric.
    normalise : bool, optional
        [description], by default True

    Returns
    -------
    [type]
        [description]
    """
    x_np = to_numpy(X)
    n = x_np.shape[0]
    knn_graph = kneighbors_graph(x_np, n_neighbour, metric=metric, mode=mode).toarray()
    W = np.zeros((n, n))
    knn_idx = np.logical_or(knn_graph, knn_graph.T)
    if mode == "distance":
        graph_kernel = pairwise_distances(x_np, metric=metric)
        W[knn_idx] = graph_kernel[knn_idx]
    else:
        W[knn_idx] = 1

    D = np.diag(np.sum(W, axis=1))
    if normalise:
        D_ = inv(sqrtm(D))
        lap_mat = np.eye(n) - multi_dot([D_, W, D_])
    else:
        lap_mat = D - W
    return np.asarray(lap_mat)




[docs]
def mmd_coef(ns, nt, ys=None, yt=None, kind="marginal", mu=0.5):
    ys_np = to_numpy(ys) if ys is not None else None
    yt_np = to_numpy(yt) if yt is not None else None
    n = ns + nt
    e = np.zeros((n, 1))
    e[:ns, 0] = 1.0 / ns
    e[ns:, 0] = -1.0 / nt
    M = np.dot(e, e.T)  # marginal mmd coefficients

    if kind == "joint" and ys_np is not None:
        Mc = 0  # conditional mmd coefficients
        class_all = np.unique(ys_np)
        if yt_np is not None:
            target_classes = np.unique(yt_np)
            if not np.array_equal(class_all, target_classes):
                raise ValueError("Source and target domain should have the same labels")

        for c in class_all:
            es = np.zeros([ns, 1])
            es[np.where(ys_np == c)] = 1.0 / (np.where(ys_np == c)[0].shape[0])
            et = np.zeros([nt, 1])
            if yt_np is not None:
                et[np.where(yt_np == c)[0]] = -1.0 / np.where(yt_np == c)[0].shape[0]
            e = np.vstack((es, et))
            e[np.where(np.isinf(e))[0]] = 0
            Mc = Mc + np.dot(e, e.T)
        M = (1 - mu) * M + mu * Mc  # joint mmd coefficients
    return np.asarray(M)




[docs]
def centering_matrix(size, dtype=np.float64):
    """Generate a centering matrix."""
    unit_matrix = np.eye(size, dtype=dtype)
    mean_matrix = np.array(1.0 / size, dtype=dtype) * np.ones((size, size), dtype=dtype)
    return unit_matrix - mean_matrix




[docs]
def centered_kernel_matrix(X, kernel="linear", metric=None, filter_params=True, **kwargs):
    """Compute a centered kernel matrix for samples in X."""
    x_np = to_numpy(X)
    kernel_metric = kernel if metric is None else metric

    kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs)
    kernel_matrix[np.isnan(kernel_matrix)] = 0
    h_matrix = centering_matrix(kernel_matrix.shape[0], dtype=kernel_matrix.dtype)
    centered_kernel = multi_dot([h_matrix, kernel_matrix, h_matrix])
    return np.asarray(centered_kernel)




[docs]
def hsic_grad_term(w, X, covariates):
    """Compute X.T H C C.T H X w for linear-kernel HSIC regularization."""
    w_np = to_numpy(w)
    x_np = to_numpy(X)
    centered_covariate_kernel = to_numpy(centered_kernel_matrix(covariates))
    grad_term = multi_dot([x_np.T, centered_covariate_kernel, x_np, w_np])
    return np.asarray(grad_term)




[docs]
def kernel_fit_matrices(X, kernel="linear", metric=None, filter_params=True, **kwargs):
    """Prepare common fit-time kernel, identity, and centering matrices."""
    x_np = to_numpy(X)
    n = x_np.shape[0]
    kernel_metric = kernel if metric is None else metric

    x_kernel_matrix = pairwise_kernels(x_np, metric=kernel_metric, filter_params=filter_params, **kwargs)
    x_kernel_matrix[np.isnan(x_kernel_matrix)] = 0

    unit_matrix = np.eye(n)
    h_matrix = centering_matrix(n, dtype=x_kernel_matrix.dtype)

    return (
        np.asarray(x_kernel_matrix),
        np.asarray(unit_matrix),
        np.asarray(h_matrix),
        n,
    )




[docs]
def base_init(X, kernel="linear", **kwargs):
    return kernel_fit_matrices(X, kernel=kernel, **kwargs)