From f1adc193ca15e6aa3a7a1a3bf18abfa5e9239c89 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Fri, 18 Oct 2019 23:10:17 +0200 Subject: [PATCH] Rename scikit_adapter to ml_adapter. --- cml/ports/ml_adapter.py | 165 ++++++++++++++++++++++++++++++++++++ cml/ports/scikit_adapter.py | 86 ------------------- 2 files changed, 165 insertions(+), 86 deletions(-) create mode 100644 cml/ports/ml_adapter.py delete mode 100644 cml/ports/scikit_adapter.py diff --git a/cml/ports/ml_adapter.py b/cml/ports/ml_adapter.py new file mode 100644 index 0000000..6aa0474 --- /dev/null +++ b/cml/ports/ml_adapter.py @@ -0,0 +1,165 @@ +from collections import Counter +from abc import ABCMeta, abstractmethod + +from numpy import array, linspace, less, greater, std, argsort +from scipy.signal import argrelextrema +import sklearn.cluster +from sklearn.neighbors.kde import KernelDensity +from keras.layers import Input, Dense +from keras.models import Model +from keras.regularizers import l1 + + +# TODO (dmt): Handle algorithms without cluster initialization! +SCIKIT_CLUSTERING_TABLE = { + sklearn.cluster.KMeans: ("n_clusters", "labels_"), + sklearn.cluster.birch.Birch: ("n_clusters", "labels_"), + sklearn.cluster.SpectralClustering: ("n_clusters", "labels_"), + sklearn.cluster.FeatureAgglomeration: ("n_clusters", "labels_"), + sklearn.cluster.AgglomerativeClustering: ("n_clusters", "labels_") +} + + +class MachineLearningModel: + + @abstractmethod + def train(self, data, *args, **kwargs): + pass + + +class FilterMethod(MachineLearningModel): + def __init__(self, model): + self.__model = model + + def train(self, data, *args, **kwargs): + self.__model = self.__model.fit(data) + return self + + def reduce(self, data): + feature_count = data.feature_count + indices = set(self.__model.get_support(indices=True)) + return {i for i in range(feature_count)}.difference(indices) + + +class EmbeddedMethod(MachineLearningModel): + _ONE_HUNDRET_PERCENT = 100 + + def __init__(self, model): + self.__model = model + + def train(self, data, *args, **kwargs): + labels = data.get_column_values("Z") + self.__model = self.__model.fit(data, labels) + return self + + def reduce(self): + importance = self.__model.feature_importances_ + indices = argsort(importance)[::-1] + if (self._ONE_HUNDRET_PERCENT*std(importance))//max(importance) >= 10: + less_relevant = indices[-1] + return {less_relevant} + return {} + + +class ConstructionClusteringMLModel(MachineLearningModel): + def __init__(self, model): + self.__model = model + self._cluster = 2 + + def get_labels(self): + return self.__model.__getattribute__( + SCIKIT_CLUSTERING_TABLE[type(self.__model)][1] + ) + + @property + def cluster(self): + return self._cluster + + @cluster.setter + def cluster(self, value): + self.__model.__setattr__( + SCIKIT_CLUSTERING_TABLE[type(self.__model)][0], value + ) + self._cluster = value + + @property + def cluster_sizes(self): + labels = self.__model.__getattribute__( + SCIKIT_CLUSTERING_TABLE[type(self.__model)][1] + ) + return Counter(labels) + + def train(self, data, *args, **kwargs): + self.__model.fit(data) + return self + + +class KernelDensityEstimator(MachineLearningModel): + + def __init__(self, kernel="gaussian", bandwidth=3, gridsize=256): + self.__model = None + self.kernel = kernel + self.bandwidth = bandwidth + self.gridsize = gridsize + + def train(self, data, *args, **kwargs): + reshaped_data = array(data).reshape(-1, 1) + if not self.__model: + self.__model = KernelDensity(kernel=self.kernel, + bandwidth=self.bandwidth) + + self.__model.fit(reshaped_data) + return self + + def density(self): + grid = linspace(0, self.gridsize) + reshaped_grid = grid.reshape(-1, 1) + return self.__model.score_samples(reshaped_grid) + + +class Autoencoder(MachineLearningModel): + + def __init__(self): + self.io_shape = None + self.target_number = None + self.targets = None + self.__model = None + self.__hidden_outputter = None + + def train(self, data, *args, **kwargs): + inputer = Input(shape=(self.io_shape, )) + hidden = Dense(units=self.target_number, + activation='relu', + activity_regularizer=l1(0.01))(inputer) + outputer = Dense(units=self.io_shape, + activation='linear')(hidden) + + self.__model = Model(inputer, outputer) + self.__model.compile(optimizer='adadelta', + loss='mean_squared_error', + metrics=['accuracy']) + self.__hidden_outputter = Model(inputer, hidden) + self.__model.fit(data, + data, + epochs=100, + batch_size=2, + shuffle=False, + validation_data=(data, data), + verbose=0) + self._predict_targets(data) + return self + + def _predict_targets(self, data): + predicts = self.__hidden_outputter.predict(data) + self.targets = [predicts[:, i] + for i in range(self.__model.layers[1].units)] + + @property + def target_error(self): + return 1 - self.__model.history.history['accuracy'][-1] + + +def find_relative_extrema(one_dim_data): + relative_min_values = argrelextrema(one_dim_data, less) + relative_max_values = argrelextrema(one_dim_data, greater) + return relative_min_values, relative_max_values diff --git a/cml/ports/scikit_adapter.py b/cml/ports/scikit_adapter.py deleted file mode 100644 index 789b6e0..0000000 --- a/cml/ports/scikit_adapter.py +++ /dev/null @@ -1,86 +0,0 @@ -from collections import Counter -from abc import ABCMeta, abstractmethod - -import sklearn.cluster -from numpy import array, linspace, less, greater -from scipy.signal import argrelextrema -from sklearn.neighbors.kde import KernelDensity - - -# TODO (dmt): Handle algorithms without cluster initialization! -SCIKIT_CLUSTERING_TABLE = { - sklearn.cluster.KMeans: ("n_clusterss", "labels_"), - sklearn.cluster.birch.Birch: ("n_clusters", "labels_"), - sklearn.cluster.SpectralClustering: ("n_clusters", "labels_"), - sklearn.cluster.FeatureAgglomeration: ("n_clusters", "labels_"), - sklearn.cluster.AgglomerativeClustering: ("n_clusters", "labels_") -} - - -class MachineLearningModel: - - @abstractmethod - def train(self, data): - pass - - -class ConstructionClusteringMLModel(MachineLearningModel): - def __init__(self, model): - self.__model = model - self._cluster = 2 - - def get_labels(self): - return self.__model.__getattribute__( - SCIKIT_CLUSTERING_TABLE[type(self.__model)][1] - ) - - @property - def cluster(self): - return self._cluster - - @cluster.setter - def cluster(self, value): - self.__model.__setattr__( - SCIKIT_CLUSTERING_TABLE[type(self.__model)][0], value - ) - - @property - def cluster_sizes(self): - labels = self.__model.__getattribute__( - SCIKIT_CLUSTERING_TABLE[type(self.__model)][1] - ) - return Counter(labels) - - @abstractmethod - def train(self, data): - self.__model.fit(data) - return self - - -class KernelDensityEstimator(MachineLearningModel): - - def __init__(self, kernel="gaussian", bandwidth=3, gridsize=256): - self.__model = None - self.kernel = kernel - self.bandwidth = bandwidth - self.gridsize = gridsize - - def train(self, data): - reshaped_data = array(data).reshape(-1, 1) - if not self.__model: - self.__model = KernelDensity(kernel=self.kernel, - bandwidth=self.bandwidth) - - self.__model.fit(reshaped_data) - return self - - def density(self): - grid = linspace(0, self.gridsize) - reshaped_grid = grid.reshape(-1, 1) - return self.__model.score_samples(reshaped_grid) - - -def find_relative_extrema(one_dim_data): - relative_min_values = argrelextrema(one_dim_data, less) - relative_max_values = argrelextrema(one_dim_data, greater) - return relative_min_values, relative_max_values -- GitLab