Skip to content
Snippets Groups Projects
Commit f1adc193 authored by dmt's avatar dmt
Browse files

Rename scikit_adapter to ml_adapter.

parent a0eab2f7
No related branches found
No related tags found
No related merge requests found
from collections import Counter
from abc import ABCMeta, abstractmethod
import sklearn.cluster
from numpy import array, linspace, less, greater
from numpy import array, linspace, less, greater, std, argsort
from scipy.signal import argrelextrema
import sklearn.cluster
from sklearn.neighbors.kde import KernelDensity
from keras.layers import Input, Dense
from keras.models import Model
from keras.regularizers import l1
# TODO (dmt): Handle algorithms without cluster initialization!
SCIKIT_CLUSTERING_TABLE = {
sklearn.cluster.KMeans: ("n_clusterss", "labels_"),
sklearn.cluster.KMeans: ("n_clusters", "labels_"),
sklearn.cluster.birch.Birch: ("n_clusters", "labels_"),
sklearn.cluster.SpectralClustering: ("n_clusters", "labels_"),
sklearn.cluster.FeatureAgglomeration: ("n_clusters", "labels_"),
......@@ -20,10 +23,44 @@ SCIKIT_CLUSTERING_TABLE = {
class MachineLearningModel:
@abstractmethod
def train(self, data):
def train(self, data, *args, **kwargs):
pass
class FilterMethod(MachineLearningModel):
def __init__(self, model):
self.__model = model
def train(self, data, *args, **kwargs):
self.__model = self.__model.fit(data)
return self
def reduce(self, data):
feature_count = data.feature_count
indices = set(self.__model.get_support(indices=True))
return {i for i in range(feature_count)}.difference(indices)
class EmbeddedMethod(MachineLearningModel):
_ONE_HUNDRET_PERCENT = 100
def __init__(self, model):
self.__model = model
def train(self, data, *args, **kwargs):
labels = data.get_column_values("Z")
self.__model = self.__model.fit(data, labels)
return self
def reduce(self):
importance = self.__model.feature_importances_
indices = argsort(importance)[::-1]
if (self._ONE_HUNDRET_PERCENT*std(importance))//max(importance) >= 10:
less_relevant = indices[-1]
return {less_relevant}
return {}
class ConstructionClusteringMLModel(MachineLearningModel):
def __init__(self, model):
self.__model = model
......@@ -43,6 +80,7 @@ class ConstructionClusteringMLModel(MachineLearningModel):
self.__model.__setattr__(
SCIKIT_CLUSTERING_TABLE[type(self.__model)][0], value
)
self._cluster = value
@property
def cluster_sizes(self):
......@@ -51,8 +89,7 @@ class ConstructionClusteringMLModel(MachineLearningModel):
)
return Counter(labels)
@abstractmethod
def train(self, data):
def train(self, data, *args, **kwargs):
self.__model.fit(data)
return self
......@@ -65,7 +102,7 @@ class KernelDensityEstimator(MachineLearningModel):
self.bandwidth = bandwidth
self.gridsize = gridsize
def train(self, data):
def train(self, data, *args, **kwargs):
reshaped_data = array(data).reshape(-1, 1)
if not self.__model:
self.__model = KernelDensity(kernel=self.kernel,
......@@ -80,6 +117,48 @@ class KernelDensityEstimator(MachineLearningModel):
return self.__model.score_samples(reshaped_grid)
class Autoencoder(MachineLearningModel):
def __init__(self):
self.io_shape = None
self.target_number = None
self.targets = None
self.__model = None
self.__hidden_outputter = None
def train(self, data, *args, **kwargs):
inputer = Input(shape=(self.io_shape, ))
hidden = Dense(units=self.target_number,
activation='relu',
activity_regularizer=l1(0.01))(inputer)
outputer = Dense(units=self.io_shape,
activation='linear')(hidden)
self.__model = Model(inputer, outputer)
self.__model.compile(optimizer='adadelta',
loss='mean_squared_error',
metrics=['accuracy'])
self.__hidden_outputter = Model(inputer, hidden)
self.__model.fit(data,
data,
epochs=100,
batch_size=2,
shuffle=False,
validation_data=(data, data),
verbose=0)
self._predict_targets(data)
return self
def _predict_targets(self, data):
predicts = self.__hidden_outputter.predict(data)
self.targets = [predicts[:, i]
for i in range(self.__model.layers[1].units)]
@property
def target_error(self):
return 1 - self.__model.history.history['accuracy'][-1]
def find_relative_extrema(one_dim_data):
relative_min_values = argrelextrema(one_dim_data, less)
relative_max_values = argrelextrema(one_dim_data, greater)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment