From a7eb5383a65fb1d1074283d44fe36417f84cbab7 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Fri, 18 Oct 2019 23:14:43 +0200 Subject: [PATCH] Define helper methods needed in construction and feature selection. --- cml/ports/source_adapters.py | 42 +++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/cml/ports/source_adapters.py b/cml/ports/source_adapters.py index 76d1272..038b6c9 100644 --- a/cml/ports/source_adapters.py +++ b/cml/ports/source_adapters.py @@ -43,20 +43,44 @@ class PandasBlock: def __init__(self, data_block, relatives=None): self.__data_block = data_block self.relatives = relatives + self.n_cluster = None def __str__(self): return str(self.__data_block) + def __repr__(self): + return str( + self.__data_block[ + self.__data_block.columns[:self.rows-self._LAST_THREE_COLUMNS]]) + def __getitem__(self, item): return self.__data_block.iloc[item][:self.rows-self._LAST_THREE_COLUMNS] def __len__(self): return self.__data_block.shape[0] + def as_numpy_array(self): + return self.__data_block[ + self.__data_block.columns[ + :self.rows - self._LAST_THREE_COLUMNS]].values + def set_labels(self, labels): - data_frame = self.__data_block["Z"] = labels + data_frame = self.__data_block.copy() + data_frame["Z"] = labels return PandasBlock(data_frame, self.relatives) + @property + def min_timestamp(self): + return min(self.__data_block["T"]) + + @property + def max_timestamp(self): + return max(self.__data_block["T"]) + + @property + def learn_rows(self): + return self.__data_block.shape[1] - 3 + @property def rows(self): return self.__data_block.shape[1] @@ -110,6 +134,22 @@ class PandasBlock: def get_column_values(self, column_name): return self.__data_block[column_name] + def get_column_name_by_index(self, index): + column_names = self.__data_block.column + return column_names[index] + + @property + def feature_count(self): + return self.__data_block.shape[1] - self._LAST_THREE_COLUMNS + + def drop_columns_by_index(self, index): + if isinstance(index, set): + remove_columns = [self.__data_block.columns[i] for i in index] + else: + remove_columns = index + + self.__data_block.drop(remove_columns, axis=1, inplace=True) + class PandasAdapter: def __init__(self, data_frame): -- GitLab