Source code for kalelinear.estimator._manifold_learn

"""
@author: Shuo Zhou, The University of Sheffield, szhou@sheffield.ac.uk

Ref: Belkin, M., Niyogi, P., & Sindhwani, V. (2006). Manifold regularization:
A geometric framework for learning from labeled and unlabeled examples.
Journal of machine learning research, 7(Nov), 2399-2434.
"""

import numpy as np
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils.validation import check_is_fitted

# import cvxpy as cvx
# from cvxpy.error import SolverError
from kalelinear.estimator.base import BaseKaleEstimator
from kalelinear.utils import base_init, lap_norm, to_numpy


[docs] class LapSVM(BaseKaleEstimator): def __init__( self, C=1.0, kernel="linear", gamma_=1.0, solver="osqp", k_neighbour=3, manifold_metric="cosine", knn_mode="distance", **kwargs, ): """Laplacian Regularized Support Vector Machine Parameters ---------- C : float, optional param for importance of slack variable, by default 1.0 kernel : str, optional 'rbf' | 'linear' | 'poly', by default 'linear' gamma_ : float, optional param for manifold regularisation, by default 1.0 solver : str, optional quadratic programming solver, [cvxopt, osqp], by default 'osqp' k_neighbour : int, optional number of nearest numbers for each sample in manifold regularisation, by default 3 manifold_metric : str, optional The distance metric used to calculate the k-Neighbors for each sample point. The DistanceMetric class gives a list of available metrics. By default 'cosine'. knn_mode : str, optional {‘connectivity’, ‘distance’}, by default 'distance'. Type of returned matrix: ‘connectivity’ will return the connectivity matrix with ones and zeros, and ‘distance’ will return the distances between neighbors according to the given metric. **kwargs: kernel param """ self.C = C self.gamma_ = gamma_ self.kernel = kernel self.solver = solver self.kwargs = kwargs self.manifold_metric = manifold_metric self.k_neighbour = k_neighbour self.knn_mode = knn_mode self._lb = LabelBinarizer(pos_label=1, neg_label=-1)
[docs] def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) y : array-like Label,, shape (n_labeled_samples, ) where n_labeled_samples <= n_samples Returns ------- self [description] """ X = to_numpy(X) y = to_numpy(y) x_kernel_matrix, unit_matrix, centering_matrix, n = base_init(X, kernel=self.kernel, **self.kwargs) if self.gamma_ == 0: Q_ = centering_matrix else: lap_mat = lap_norm(X, n_neighbour=self.k_neighbour, mode=self.knn_mode) Q_ = centering_matrix + self.gamma_ * np.dot(lap_mat, x_kernel_matrix) y_ = self._lb.fit_transform(y) self.coef_, self.support_ = self._solve_semi_dual(x_kernel_matrix, y_, Q_, self.C, self.solver) # if self._lb.y_type_ == 'binary': # self.support_vectors_ = X[:nl, :][self.support_] # self.n_support_ = self.support_vectors_.shape[0] # else: # self.support_vectors_ = [] # self.n_support_ = [] # for i in range(y_.shape[1]): # self.support_vectors_.append(X[:nl, :][self.support_[i]][-1]) # self.n_support_.append(self.support_vectors_[-1].shape[0]) self.X = X self.y = y return self
[docs] def decision_function(self, X): """Evaluates the decision function for the samples in X. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) Returns ------- array-like decision scores, shape (n_samples,) for binary classification, (n_samples, n_classes) for multi-class cases """ check_is_fitted(self, "X") check_is_fitted(self, "y") # x_fit = self.X x_np = to_numpy(X) x_kernel_matrix = pairwise_kernels(x_np, self.X, metric=self.kernel, filter_params=True, **self.kwargs) scores = np.dot(x_kernel_matrix, self.coef_) return scores # +self.intercept_
[docs] def predict(self, X): """Perform classification on samples in X. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) Returns ------- array-like predicted labels, shape (n_samples,) """ dec = to_numpy(self.decision_function(X)) return self._lb.inverse_transform(dec, threshold=0)
[docs] def fit_predict(self, X, y): """Fit the model according to the given training data and then perform classification on samples in X. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) y : array-like Label,, shape (n_labeled_samples, ) where n_labeled_samples <= n_samples Returns ------- array-like predicted labels, shape (n_samples,) """ self.fit(X, y) return self.predict(X)
[docs] class LapRLS(BaseKaleEstimator): def __init__( self, kernel="linear", gamma_=1.0, sigma_=1.0, k_neighbour=5, manifold_metric="cosine", knn_mode="distance", **kwargs, ): """Laplacian Regularized Least Squares Parameters ---------- kernel : str, optional 'rbf' | 'linear' | 'poly', by default 'linear' gamma_ : float, optional manifold regularisation param, by default 1.0 sigma_ : float, optional l2 regularisation param, by default 1.0 k_neighbour : int, optional number of nearest numbers for each sample in manifold regularisation, by default 5 manifold_metric : str, optional The distance metric used to calculate the k-Neighbors for each sample point. The DistanceMetric class gives a list of available metrics. By default 'cosine'. knn_mode : str, optional {‘connectivity’, ‘distance’}, by default 'distance'. Type of returned matrix: ‘connectivity’ will return the connectivity matrix with ones and zeros, and ‘distance’ will return the distances between neighbors according to the given metric. kwargs: kernel params """ self.kwargs = kwargs self.kernel = kernel self.gamma_ = gamma_ self.sigma_ = sigma_ self.k_neighbour = k_neighbour # self.coef_ = None self.knn_mode = knn_mode self.manifold_metric = manifold_metric self._lb = LabelBinarizer(pos_label=1, neg_label=-1)
[docs] def fit(self, X, y): """ "Fit the model according to the given training data. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) y : array-like Label,, shape (n_labeled_samples, ) where n_labeled_samples <= n_samples Returns ------- self [description] """ X = to_numpy(X) y = to_numpy(y) nl = y.shape[0] x_kernel_matrix, unit_matrix, centering_matrix, n = base_init(X, kernel=self.kernel, **self.kwargs) J = np.zeros((n, n)) J[:nl, :nl] = np.eye(nl) if self.gamma_ != 0: lap_mat = lap_norm(X, n_neighbour=self.k_neighbour, metric=self.manifold_metric, mode=self.knn_mode) Q_ = np.dot((J + self.gamma_ * lap_mat), x_kernel_matrix) + self.sigma_ * unit_matrix else: Q_ = np.dot(J, x_kernel_matrix) + self.sigma_ * centering_matrix y_ = self._lb.fit_transform(y) self.coef_ = self._solve_semi_ls(Q_, y_) self.X = X self.y = y return self
[docs] def predict(self, X): """Perform classification on samples in X. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) Returns ------- array-like predicted labels, shape (n_samples,) """ dec = to_numpy(self.decision_function(X)) return self._lb.inverse_transform(dec, threshold=0)
[docs] def decision_function(self, X): """Evaluates the decision function for the samples in X Parameters ---------- X : array-like Input data, shape (n_samples, n_features) Returns ------- array-like decision scores, shape (n_samples,) for binary classification, (n_samples, n_classes) for multi-class cases """ x_np = to_numpy(X) x_kernel_matrix = pairwise_kernels(x_np, self.X, metric=self.kernel, filter_params=True, **self.kwargs) scores = np.dot(x_kernel_matrix, self.coef_) return scores
[docs] def fit_predict(self, X, y): """Fit the model according to the given training data and then perform classification on samples in X. Parameters ---------- X : array-like Input data, shape (n_samples, n_features) y : array-like Label,, shape (n_labeled_samples, ) where n_labeled_samples <= n_samples Returns ------- array-like predicted labels, shape (n_samples,) """ self.fit(X, y) return self.predict(X)