Skip to content
Snippets Groups Projects
Commit 9bb65612 authored by dmt's avatar dmt
Browse files

Refactor data source.

parent 5f9d463b
No related branches found
No related tags found
No related merge requests found
......@@ -61,9 +61,14 @@ class DataSource:
@property
@log_learnblock_processing
def learnblocks(self):
if self.settings.block_size > len(self):
raise ValueError("Block size cannot be larger then the size"
"the data source.")
for block in self:
learnblock = self.__learnblock_identifier.identify(block)
if learnblock:
learnblock.origin = "source"
self._flip_source_halde_flags(learnblock.indexes)
yield learnblock
......@@ -78,7 +83,11 @@ class DataSource:
halde_runs = -1
for i in cycle(range(0, len(self))):
if counter == self.block_size:
if halde_runs >= self.settings.stack_iterations:
# manually stop generator
return
if counter == self.settings.block_size:
old_index = i
counter = 0
yield self.__source.get_block_via_index(block_indexes)
......@@ -108,15 +117,33 @@ class DataSource:
def __len__(self):
return self.__source.length
def get_block(self, indices=None):
return self.__source.get_block_via_index(indices)
def get_block(self, indices=None, columns=None):
return self.__source.get_block_via_index(indices, columns=columns)
def time_sigma_relatives(self, block):
return next(iter(self.__learnblock_identifier._identify_relatives(
block, "T", "Sigma")))
def estimate_density(self, data):
return self.__learnblock_identifier.density_estimator(data).density()
kernel_density_estimator = self.__learnblock_identifier.\
density_estimator.train(data)
return kernel_density_estimator.density()
def remove_time_dense_relatives(self, block, density):
self.__learnblock_identifier._remove_time_dense_relatives(
block, density)
def cluster(self, block, density):
return self.__learnblock_identifier._cluster_sigma_zeta_relatives(
block, density
)
def new_learnblock(self, values, columns, index, origin):
return self.__source.new_block(values, columns, index, origin)
def get_time_values(self, indices):
return self.__source.get_block_via_index(indices, columns="T")\
.as_numpy_array()
class Preprocessor:
......@@ -165,15 +192,20 @@ class Preprocessor:
class LearnblockIdentifier:
def __init__(self, settings, density_estimator, relative_extrema):
self.settings = settings
self.column_pairs = (("T", "Z"), ("T", "Sigma"), ("Sigma", "Z"))
self.density_estimator = density_estimator
self._relative_extrema = relative_extrema
@classmethod
def _column_pairs(cls):
yield ("T", "Z")
yield ("T", "Sigma")
yield ("Sigma", "Z")
def identify(self, block):
biggest_learn_block = None
biggest_block_size = 0
for pair in self.column_pairs:
for pair in self._column_pairs():
for possible_learnblock in self._identify_relatives(block, *pair):
if self._is_learn_block(possible_learnblock.length):
if possible_learnblock.length > biggest_block_size:
......@@ -209,7 +241,7 @@ class LearnblockIdentifier:
time_column = relatives.get_column_values("T")
density = self.density_estimator.train(time_column).density()
self._remove_time_dense_relatives(relatives, density)
clusters = self._cluster_sigma_zeta_realtives(relatives, density)
clusters = self._cluster_sigma_zeta_relatives(relatives, density)
for time_values in clusters:
yield relatives.new_block_from(time_values)
......@@ -219,7 +251,7 @@ class LearnblockIdentifier:
if dens > max_dens*(self.settings.sigma_zeta_cutoff/100):
block.drop_row(index)
def _cluster_sigma_zeta_realtives(self, cutted_block, density):
def _cluster_sigma_zeta_relatives(self, cutted_block, density):
# TOOD (dmt): Don't rely on data series from pandas, 'cause ckmeans
# needs primitives data types.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment