From 4a5cd05bac1c715afe19de36a9963ff457f5aef0 Mon Sep 17 00:00:00 2001
From: dmt <>
Date: Thu, 3 Oct 2019 19:03:56 +0200
Subject: [PATCH] Define wrapper around pandas that represents a block of a
 dataframe with addtional methods for finding duplicate value pairs getting
 all these duplicates.

---
 cml/ports/source_adapters.py | 46 ++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/cml/ports/source_adapters.py b/cml/ports/source_adapters.py
index 7b33f85..073601f 100644
--- a/cml/ports/source_adapters.py
+++ b/cml/ports/source_adapters.py
@@ -32,6 +32,52 @@ class Adapter(ABC):
     @abstractmethod
     def get_column_values_as_list(self, *args, **kwargs): pass
 
+    @abstractmethod
+    def get_block(self, *args, **kwargs): pass
+
+
+# TODO (dmt): Provide common base class or pandas operations.
+
+class PandasBlock:
+    def __init__(self, data_block):
+        self.__data_block = data_block
+
+    def __str__(self):
+        return str(self.__data_block)
+
+    def get_duplicated_pairs(self, *args):
+        bool_series = self.__data_block.duplicated(subset=[args[0], args[1]])
+
+        duplicates = self.__data_block[bool_series]
+        for i, j in zip(duplicates[args[0]], duplicates[args[1]]):
+            yield i, j
+
+    def get_values(self, **kwargs):
+        t, z, sigma = kwargs.get("T"), kwargs.get("Z"), kwargs.get("Sigma")
+        if t and z:
+            data_frame = self.__data_block.loc[
+                (self.__data_block["T"] == t) & (self.__data_block["Z"] == z)]
+
+        elif t and sigma:
+            data_frame = self.__data_block.loc[
+                (self.__data_block["T"] == t) & (
+                        self.__data_block["Sigma"] == sigma)]
+
+        elif z and sigma:
+            data_frame = self.__data_block.loc[
+                (self.__data_block["Z"] == z) & (
+                        self.__data_block["Sigma"] == sigma)]
+
+        else:
+            # TODO (dmt): Write proper error handling.
+            raise Exception()
+
+        return PandasBlock(data_frame)
+
+    @property
+    def length(self):
+        return len(self.__data_block)
+
 
 class PandasAdapter:
     def __init__(self, data_frame):
-- 
GitLab