Skip to content
Snippets Groups Projects
Commit 0319d03d authored by dmt's avatar dmt
Browse files

Define the DataSource, Preprocessor and LearnblockIdentifier.

parent 72a917b8
No related branches found
No related tags found
No related merge requests found
from os.path import commonprefix
__all__ = (
"DataSource",
"Preprocessor",
"LearnblockIdentifier"
)
class DataSource:
def __init__(self, source, learnblock_identifier):
self.source = source
self.learnblock_identifier = learnblock_identifier
class Preprocessor:
TARGET_COLUMN = "Z"
TIME_COLUMN = "T"
def __init__(self, settings):
self.settings = settings
def clean(self, table):
self._drop_irrelevant_columns(table)
if self.settings.set_targets:
self._overwrite_target_column(table)
if self.settings.sort_time_stamp:
self._sort_according_time_stamp(table)
if self.settings.cut_time_stamp:
self._remove_common_time_stamp_prefix(table)
def _drop_irrelevant_columns(self, table):
# TODO (dmt): Don't drop T, Z and Sigma columns!
for column in table.get_columns():
column_index = table.get_column_index_by_name(column)
if column_index not in self.settings.set_features:
table.drop_column_by_index(column_index)
def _overwrite_target_column(self, table):
table.set_column_value(self.TARGET_COLUMN, self.settings.set_targets)
def _sort_according_time_stamp(self, table):
table.sort(self.TIME_COLUMN)
def _remove_common_time_stamp_prefix(self, table):
# TODO (dmt): Check if timestamp column is of type string!
time_column = table.get_column_values_as_list(self.TIME_COLUMN)
common_prefix = commonprefix(time_column)
cleaned_time_column = [s.lstrip(common_prefix) for s in time_column]
table.set_column_values(cleaned_time_column)
class LearnblockIdentifier:
def __init__(self, settings):
self.settings = settings
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment