Fix bugs in time/sigma and sigma/zeta deconstructions.

c8e46f2b · dmt · ac5c8316 · c8e46f2b
Commit c8e46f2b authored 5 years ago by dmt
--- a/cml/domain/deconstruction.py
+++ b/cml/domain/deconstruction.py
@@ -329,38 +329,40 @@ class Deconstructor:
        success = False
        if r_model and p_model.tier < self.settings.highest_tier-1:
-            # Get learnblock that trained relative model
            second_block = r_model.trained_with(self.source)
+            overlapping = second_block.new_block_from(block.get_column_values("T"))
+            if overlapping.rows >= self.settings.learn_block_minimum:
+                alpha = self.calculate_reliability(p_model.pre_image_labels,
+                                                   r_model.pre_image_labels)
+                alpha_systematic = alpha < 0
+                alpha_weak_reliability = 0 <= alpha < self.settings.min_reliability
+                if (self.settings.allow_weak_reliability and
+                   alpha_weak_reliability) or alpha_systematic:
+                    overlapping_b = block.new_block_from(overlapping.get_column_values("T"))
+                    overblock = self.source.new_learnblock(
+                        values=list(zip(
+                            overlapping.get_column_values("Z"),
+                            overlapping_b.get_column_values("Z"),
+                            overlapping.get_column_values("T"),
+                            ("\"\"" for _ in range(overlapping.rows)),
+                            ("\"\"" for _ in range(overlapping.rows)))),
+                        columns=(p_model.uid, r_model.uid, "T", "Sigma", "Z"),
+                        index=[i for i in range(overlapping.rows)],
+                        origin=[p_model.uid, r_model.uid]
+                    )
-            # Get samples that have overlapping timestamp
-            over_block = block.overlapping_rows(second_block, subset=["T"])
-            # Check rows constraint
-            if over_block.rows >= self.settings.learn_block_minimum:
-                # Calculate reliability (which block???)
-                alpha = self.calc_reliability(r_model, p_model, block)
-                #alpha_systematic = alpha < 0
-                #alpha_weak_reliability = 0 <= alpha < self.settings.min_reliability
-                #if (self.settings.allow_weak_reliability and
-                #    alpha_weak_reliability) or alpha_systematic:
-                if self.settings.allow_weak_reliability and \
-                        alpha > self.settings.min_reliability:
-                    # Create learnblock from the aim values of the overlapping
                    # samples
-                    data = list(zip(over_block.get_column_values("Z"),
+                    # data = list(zip(over_block.get_column_values("Z"),
-                                    over_block.get_column_values("T"),
+                    #                 over_block.get_column_values("T"),
-                                    ["\"\"" for _ in range(over_block.rows)],
+                    #                 ["\"\"" for _ in range(over_block.rows)],
-                                    ["\"\"" for _ in range(over_block.rows)]))
+                    #                 ["\"\"" for _ in range(over_block.rows)]))
-                    feature = ".".join(["0", str(tier+1), "1"])
+                    # feature = ".".join(["0", str(tier+1), "1"])
-                    columns = [feature, "T", "Sigma", "Z"]
+                    # columns = [feature, "T", "Sigma", "Z"]
-                    source = self.source.new_learnblock(
+                    # source = self.source.new_learnblock(
-                        values=data, columns=columns, index=over_block.indexes,
+                    #     values=data, columns=columns, index=over_block.indexes,
-                        origin=[p_model.uid, r_model.uid])
+                    #     origin=[p_model.uid, r_model.uid])
-                    TS_QUEUE.append((tier+1, source))
+                    TS_QUEUE.append((tier+1, overblock))
                    success = True
        if not success:
@@ -421,24 +423,17 @@ class Deconstructor:
            # Get learnblock that trained relative model
            second_block = r_model.trained_with(self.source)
-            # Get samples that have overlapping rows
+            overlapping_block = block.same_features_fusion(second_block)
-            overlapping_block = block.overlapping_rows(second_block)
            # Check constraint
-            if overlapping_block.rows >= 2:
+            if overlapping_block.n_features >= 2:
                # Model fusion
-                new_model = p_model.fusion(
+                new_model = p_model.fusion(r_model, self.NEXT_MODEL_COUNTER(tier))
-                    r_model, self.NEXT_MODEL_COUNTER(tier))
                which_ml_models = new_model.sigma
-                # Get learnblock
-                train_block = block.fusion(second_block)
                try:
                    # Reconstruct model
                    recon_m = self.reconstructor.reconstruct(
-                        tier, train_block, which_ml_models, new_model)
+                        tier, overlapping_block, which_ml_models, new_model)
                    self.knowledge_database.replace(r_model, recon_m)
                    success = True
@@ -516,7 +511,7 @@ class Deconstructor:
                # Create submodel from TSgima relative samples
                second_block = r_model.trained_with(self.source)
-                new_block = block.fusion(second_block)
+                new_block = block.same_features_fusion(second_block)
                ts_relatives = self.source.time_sigma_relatives(new_block)
                which_ml_models = p_model.subject + r_model.subject
                self.reconstructor.reconstruct(
@@ -527,7 +522,7 @@ class Deconstructor:
            # Create learnblock
            first_block = p_model.trained_with(self.source)
            second_block = r_model.trained_with(self.source)
-            new_block = first_block.fusion(second_block)
+            new_block = first_block.same_features_fusion(second_block)
            which_ml_models = new_model.sigma
            try:
@@ -583,18 +578,25 @@ class Deconstructor:
            set(first_block.columns()).intersection(set(second_block.columns()))
        )
-    def calc_reliability(self,
+    def calculate_reliability(self, predicts_a, predicts_b):
-                         model_a: PragmaticMachineLearningModel,
+        predictions = [predicts_a, predicts_b]
-                         model_b: PragmaticMachineLearningModel,
-                         block):
-        y_one = model_a.model.predict(block.as_numpy_array())
-        y_two = model_b.model.predict(block.as_numpy_array())
-        reliability_data = [y_one, y_two]
        if self.reconstructor.category == "conceptual":
-            return krippendorff.alpha(reliability_data,
+            return krippendorff.alpha(predictions, level_of_measurement="nomimal")
-                                      level_of_measurement="nominal")
+        elif self.reconstructor.category:
-        elif self.reconstructor.category == "procedural":
+            return krippendorff.alpha(predictions, level_of_measurement="ration")
-            return krippendorff.alpha(reliability_data,
+    #
-                                      level_of_measurement="ratio")
+    # def calc_reliability(self,
-        else:
+    #                      model_a: PragmaticMachineLearningModel,
-            raise ValueError()
+    #                      model_b: PragmaticMachineLearningModel,
+    #                      block):
+    #     y_one = model_a.model.predict(block.as_numpy_array())
+    #     y_two = model_b.model.predict(block.as_numpy_array())
+    #     reliability_data = [y_one, y_two]
+    #     if self.reconstructor.category == "conceptual":
+    #         return krippendorff.alpha(reliability_data,
+    #                                   level_of_measurement="nominal")
+    #     elif self.reconstructor.category == "procedural":
+    #         return krippendorff.alpha(reliability_data,
+    #                                   level_of_measurement="ratio")
+    #     else:
+    #         raise ValueError()