From 0319d03de04c3c09a0b43134c7c084fefbdae491 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Tue, 1 Oct 2019 20:53:37 +0200 Subject: [PATCH] Define the DataSource, Preprocessor and LearnblockIdentifier. --- cml/domain/data_source.py | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 cml/domain/data_source.py diff --git a/cml/domain/data_source.py b/cml/domain/data_source.py new file mode 100644 index 0000000..c438572 --- /dev/null +++ b/cml/domain/data_source.py @@ -0,0 +1,59 @@ +from os.path import commonprefix + + +__all__ = ( + "DataSource", + "Preprocessor", + "LearnblockIdentifier" +) + + +class DataSource: + def __init__(self, source, learnblock_identifier): + self.source = source + self.learnblock_identifier = learnblock_identifier + + +class Preprocessor: + TARGET_COLUMN = "Z" + TIME_COLUMN = "T" + + def __init__(self, settings): + self.settings = settings + + def clean(self, table): + self._drop_irrelevant_columns(table) + + if self.settings.set_targets: + self._overwrite_target_column(table) + + if self.settings.sort_time_stamp: + self._sort_according_time_stamp(table) + + if self.settings.cut_time_stamp: + self._remove_common_time_stamp_prefix(table) + + def _drop_irrelevant_columns(self, table): + # TODO (dmt): Don't drop T, Z and Sigma columns! + for column in table.get_columns(): + column_index = table.get_column_index_by_name(column) + if column_index not in self.settings.set_features: + table.drop_column_by_index(column_index) + + def _overwrite_target_column(self, table): + table.set_column_value(self.TARGET_COLUMN, self.settings.set_targets) + + def _sort_according_time_stamp(self, table): + table.sort(self.TIME_COLUMN) + + def _remove_common_time_stamp_prefix(self, table): + # TODO (dmt): Check if timestamp column is of type string! + time_column = table.get_column_values_as_list(self.TIME_COLUMN) + common_prefix = commonprefix(time_column) + cleaned_time_column = [s.lstrip(common_prefix) for s in time_column] + table.set_column_values(cleaned_time_column) + + +class LearnblockIdentifier: + def __init__(self, settings): + self.settings = settings -- GitLab