From 0289a47a9587911900abcb2061ef26b1b09c2112 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Thu, 10 Oct 2019 17:55:07 +0200 Subject: [PATCH] Implement the sequence protocol in PandasBlock. --- cml/ports/source_adapters.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/cml/ports/source_adapters.py b/cml/ports/source_adapters.py index d769fbc..a183a87 100644 --- a/cml/ports/source_adapters.py +++ b/cml/ports/source_adapters.py @@ -37,14 +37,35 @@ class Adapter(ABC): # TODO (dmt): Provide common base class or pandas operations. - class PandasBlock: - def __init__(self, data_block): + _LAST_THREE_COLUMNS = 3 + + def __init__(self, data_block, relatives=None): self.__data_block = data_block def __str__(self): return str(self.__data_block) + def __getitem__(self, item): + return self.__data_block.iloc[item][:self.rows-self._LAST_THREE_COLUMNS] + + def __len__(self): + return self.__data_block.shape[0] + + def set_labels(self, labels): + data_frame = self.__data_block["Z"] = labels + return PandasBlock(data_frame, self.relatives) + + @property + def rows(self): + return self.__data_block.shape[1] + + def new_block_from(self, column_values): + data_from = self.__data_block.loc[self.__data_block["T"].isin( + column_values)] + + return PandasBlock(data_from) + def get_duplicated_pairs(self, *args): bool_series = self.__data_block.duplicated(subset=[args[0], args[1]]) -- GitLab