From f342a4f6b65116f69e3be99eea561a95e4b18c4a Mon Sep 17 00:00:00 2001
From: dmt <>
Date: Thu, 3 Oct 2019 19:06:53 +0200
Subject: [PATCH] Identify blocks that satisfy the criteria for being a
 learning block.

---
 cml/domain/data_source.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/cml/domain/data_source.py b/cml/domain/data_source.py
index c438572..d9e6242 100644
--- a/cml/domain/data_source.py
+++ b/cml/domain/data_source.py
@@ -57,3 +57,22 @@ class Preprocessor:
 class LearnblockIdentifier:
     def __init__(self, settings):
         self.settings = settings
+        self.column_pairs = (("T", "Z"), ("T", "Sigma"), ("Sigma", "Z"))
+
+    def identify(self, block):
+        for pair in self.column_pairs:
+            for possible_learnblock in self._identify_relatives(block, *pair):
+                if self._is_learn_block(possible_learnblock.length):
+                    yield possible_learnblock
+
+    def _is_learn_block(self, block_length):
+        return block_length > self.settings.learn_block_minimum
+
+    def _identify_relatives(self, block, *args):
+        # TODO (dmt): Implement density and kmeans!
+        already_seen = set()
+        for value_pair in block.get_duplicated_pairs(args[0], args[1]):
+            if value_pair not in already_seen:
+                already_seen.add(value_pair)
+                kw = {args[0]: value_pair[0], args[1]: value_pair[1]}
+                yield block.get_values(**kw)
-- 
GitLab