From 4a5cd05bac1c715afe19de36a9963ff457f5aef0 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Thu, 3 Oct 2019 19:03:56 +0200 Subject: [PATCH] Define wrapper around pandas that represents a block of a dataframe with addtional methods for finding duplicate value pairs getting all these duplicates. --- cml/ports/source_adapters.py | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/cml/ports/source_adapters.py b/cml/ports/source_adapters.py index 7b33f85..073601f 100644 --- a/cml/ports/source_adapters.py +++ b/cml/ports/source_adapters.py @@ -32,6 +32,52 @@ class Adapter(ABC): @abstractmethod def get_column_values_as_list(self, *args, **kwargs): pass + @abstractmethod + def get_block(self, *args, **kwargs): pass + + +# TODO (dmt): Provide common base class or pandas operations. + +class PandasBlock: + def __init__(self, data_block): + self.__data_block = data_block + + def __str__(self): + return str(self.__data_block) + + def get_duplicated_pairs(self, *args): + bool_series = self.__data_block.duplicated(subset=[args[0], args[1]]) + + duplicates = self.__data_block[bool_series] + for i, j in zip(duplicates[args[0]], duplicates[args[1]]): + yield i, j + + def get_values(self, **kwargs): + t, z, sigma = kwargs.get("T"), kwargs.get("Z"), kwargs.get("Sigma") + if t and z: + data_frame = self.__data_block.loc[ + (self.__data_block["T"] == t) & (self.__data_block["Z"] == z)] + + elif t and sigma: + data_frame = self.__data_block.loc[ + (self.__data_block["T"] == t) & ( + self.__data_block["Sigma"] == sigma)] + + elif z and sigma: + data_frame = self.__data_block.loc[ + (self.__data_block["Z"] == z) & ( + self.__data_block["Sigma"] == sigma)] + + else: + # TODO (dmt): Write proper error handling. + raise Exception() + + return PandasBlock(data_frame) + + @property + def length(self): + return len(self.__data_block) + class PandasAdapter: def __init__(self, data_frame): -- GitLab