From f1adc193ca15e6aa3a7a1a3bf18abfa5e9239c89 Mon Sep 17 00:00:00 2001
From: dmt <>
Date: Fri, 18 Oct 2019 23:10:17 +0200
Subject: [PATCH] Rename scikit_adapter to ml_adapter.

---
 cml/ports/ml_adapter.py     | 165 ++++++++++++++++++++++++++++++++++++
 cml/ports/scikit_adapter.py |  86 -------------------
 2 files changed, 165 insertions(+), 86 deletions(-)
 create mode 100644 cml/ports/ml_adapter.py
 delete mode 100644 cml/ports/scikit_adapter.py

diff --git a/cml/ports/ml_adapter.py b/cml/ports/ml_adapter.py
new file mode 100644
index 0000000..6aa0474
--- /dev/null
+++ b/cml/ports/ml_adapter.py
@@ -0,0 +1,165 @@
+from collections import Counter
+from abc import ABCMeta, abstractmethod
+
+from numpy import array, linspace, less, greater, std, argsort
+from scipy.signal import argrelextrema
+import sklearn.cluster
+from sklearn.neighbors.kde import KernelDensity
+from keras.layers import Input, Dense
+from keras.models import Model
+from keras.regularizers import l1
+
+
+# TODO (dmt): Handle algorithms without cluster initialization!
+SCIKIT_CLUSTERING_TABLE = {
+    sklearn.cluster.KMeans: ("n_clusters", "labels_"),
+    sklearn.cluster.birch.Birch: ("n_clusters", "labels_"),
+    sklearn.cluster.SpectralClustering: ("n_clusters", "labels_"),
+    sklearn.cluster.FeatureAgglomeration: ("n_clusters", "labels_"),
+    sklearn.cluster.AgglomerativeClustering: ("n_clusters", "labels_")
+}
+
+
+class MachineLearningModel:
+
+    @abstractmethod
+    def train(self, data, *args, **kwargs):
+        pass
+
+
+class FilterMethod(MachineLearningModel):
+    def __init__(self, model):
+        self.__model = model
+
+    def train(self, data, *args, **kwargs):
+        self.__model = self.__model.fit(data)
+        return self
+
+    def reduce(self, data):
+        feature_count = data.feature_count
+        indices = set(self.__model.get_support(indices=True))
+        return {i for i in range(feature_count)}.difference(indices)
+
+
+class EmbeddedMethod(MachineLearningModel):
+    _ONE_HUNDRET_PERCENT = 100
+
+    def __init__(self, model):
+        self.__model = model
+
+    def train(self, data, *args, **kwargs):
+        labels = data.get_column_values("Z")
+        self.__model = self.__model.fit(data, labels)
+        return self
+
+    def reduce(self):
+        importance = self.__model.feature_importances_
+        indices = argsort(importance)[::-1]
+        if (self._ONE_HUNDRET_PERCENT*std(importance))//max(importance) >= 10:
+            less_relevant = indices[-1]
+            return {less_relevant}
+        return {}
+
+
+class ConstructionClusteringMLModel(MachineLearningModel):
+    def __init__(self, model):
+        self.__model = model
+        self._cluster = 2
+
+    def get_labels(self):
+        return self.__model.__getattribute__(
+            SCIKIT_CLUSTERING_TABLE[type(self.__model)][1]
+        )
+
+    @property
+    def cluster(self):
+        return self._cluster
+
+    @cluster.setter
+    def cluster(self, value):
+        self.__model.__setattr__(
+            SCIKIT_CLUSTERING_TABLE[type(self.__model)][0], value
+        )
+        self._cluster = value
+
+    @property
+    def cluster_sizes(self):
+        labels = self.__model.__getattribute__(
+            SCIKIT_CLUSTERING_TABLE[type(self.__model)][1]
+        )
+        return Counter(labels)
+
+    def train(self, data, *args, **kwargs):
+        self.__model.fit(data)
+        return self
+
+
+class KernelDensityEstimator(MachineLearningModel):
+
+    def __init__(self, kernel="gaussian", bandwidth=3, gridsize=256):
+        self.__model = None
+        self.kernel = kernel
+        self.bandwidth = bandwidth
+        self.gridsize = gridsize
+
+    def train(self, data, *args, **kwargs):
+        reshaped_data = array(data).reshape(-1, 1)
+        if not self.__model:
+            self.__model = KernelDensity(kernel=self.kernel,
+                                         bandwidth=self.bandwidth)
+
+        self.__model.fit(reshaped_data)
+        return self
+
+    def density(self):
+        grid = linspace(0, self.gridsize)
+        reshaped_grid = grid.reshape(-1, 1)
+        return self.__model.score_samples(reshaped_grid)
+
+
+class Autoencoder(MachineLearningModel):
+
+    def __init__(self):
+        self.io_shape = None
+        self.target_number = None
+        self.targets = None
+        self.__model = None
+        self.__hidden_outputter = None
+
+    def train(self, data, *args, **kwargs):
+        inputer = Input(shape=(self.io_shape, ))
+        hidden = Dense(units=self.target_number,
+                       activation='relu',
+                       activity_regularizer=l1(0.01))(inputer)
+        outputer = Dense(units=self.io_shape,
+                         activation='linear')(hidden)
+
+        self.__model = Model(inputer, outputer)
+        self.__model.compile(optimizer='adadelta',
+                             loss='mean_squared_error',
+                             metrics=['accuracy'])
+        self.__hidden_outputter = Model(inputer, hidden)
+        self.__model.fit(data,
+                         data,
+                         epochs=100,
+                         batch_size=2,
+                         shuffle=False,
+                         validation_data=(data, data),
+                         verbose=0)
+        self._predict_targets(data)
+        return self
+
+    def _predict_targets(self, data):
+        predicts = self.__hidden_outputter.predict(data)
+        self.targets = [predicts[:, i]
+                        for i in range(self.__model.layers[1].units)]
+
+    @property
+    def target_error(self):
+        return 1 - self.__model.history.history['accuracy'][-1]
+
+
+def find_relative_extrema(one_dim_data):
+    relative_min_values = argrelextrema(one_dim_data, less)
+    relative_max_values = argrelextrema(one_dim_data, greater)
+    return relative_min_values, relative_max_values
diff --git a/cml/ports/scikit_adapter.py b/cml/ports/scikit_adapter.py
deleted file mode 100644
index 789b6e0..0000000
--- a/cml/ports/scikit_adapter.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from collections import Counter
-from abc import ABCMeta, abstractmethod
-
-import sklearn.cluster
-from numpy import array, linspace, less, greater
-from scipy.signal import argrelextrema
-from sklearn.neighbors.kde import KernelDensity
-
-
-# TODO (dmt): Handle algorithms without cluster initialization!
-SCIKIT_CLUSTERING_TABLE = {
-    sklearn.cluster.KMeans: ("n_clusterss", "labels_"),
-    sklearn.cluster.birch.Birch: ("n_clusters", "labels_"),
-    sklearn.cluster.SpectralClustering: ("n_clusters", "labels_"),
-    sklearn.cluster.FeatureAgglomeration: ("n_clusters", "labels_"),
-    sklearn.cluster.AgglomerativeClustering: ("n_clusters", "labels_")
-}
-
-
-class MachineLearningModel:
-
-    @abstractmethod
-    def train(self, data):
-        pass
-
-
-class ConstructionClusteringMLModel(MachineLearningModel):
-    def __init__(self, model):
-        self.__model = model
-        self._cluster = 2
-
-    def get_labels(self):
-        return self.__model.__getattribute__(
-            SCIKIT_CLUSTERING_TABLE[type(self.__model)][1]
-        )
-
-    @property
-    def cluster(self):
-        return self._cluster
-
-    @cluster.setter
-    def cluster(self, value):
-        self.__model.__setattr__(
-            SCIKIT_CLUSTERING_TABLE[type(self.__model)][0], value
-        )
-
-    @property
-    def cluster_sizes(self):
-        labels = self.__model.__getattribute__(
-            SCIKIT_CLUSTERING_TABLE[type(self.__model)][1]
-        )
-        return Counter(labels)
-
-    @abstractmethod
-    def train(self, data):
-        self.__model.fit(data)
-        return self
-
-
-class KernelDensityEstimator(MachineLearningModel):
-
-    def __init__(self, kernel="gaussian", bandwidth=3, gridsize=256):
-        self.__model = None
-        self.kernel = kernel
-        self.bandwidth = bandwidth
-        self.gridsize = gridsize
-
-    def train(self, data):
-        reshaped_data = array(data).reshape(-1, 1)
-        if not self.__model:
-            self.__model = KernelDensity(kernel=self.kernel,
-                                         bandwidth=self.bandwidth)
-
-        self.__model.fit(reshaped_data)
-        return self
-
-    def density(self):
-        grid = linspace(0, self.gridsize)
-        reshaped_grid = grid.reshape(-1, 1)
-        return self.__model.score_samples(reshaped_grid)
-
-
-def find_relative_extrema(one_dim_data):
-    relative_min_values = argrelextrema(one_dim_data, less)
-    relative_max_values = argrelextrema(one_dim_data, greater)
-    return relative_min_values, relative_max_values
-- 
GitLab