diff --git a/cml/domain/deconstruction.py b/cml/domain/deconstruction.py index 02d23c89635832cdc84eeb36e80544b098abf680..c22c39bd3a6748710016f3bcd8b848c8c6778fd6 100644 --- a/cml/domain/deconstruction.py +++ b/cml/domain/deconstruction.py @@ -329,38 +329,40 @@ class Deconstructor: success = False if r_model and p_model.tier < self.settings.highest_tier-1: - # Get learnblock that trained relative model second_block = r_model.trained_with(self.source) + overlapping = second_block.new_block_from(block.get_column_values("T")) + + if overlapping.rows >= self.settings.learn_block_minimum: + alpha = self.calculate_reliability(p_model.pre_image_labels, + r_model.pre_image_labels) + alpha_systematic = alpha < 0 + alpha_weak_reliability = 0 <= alpha < self.settings.min_reliability + if (self.settings.allow_weak_reliability and + alpha_weak_reliability) or alpha_systematic: + overlapping_b = block.new_block_from(overlapping.get_column_values("T")) + overblock = self.source.new_learnblock( + values=list(zip( + overlapping.get_column_values("Z"), + overlapping_b.get_column_values("Z"), + overlapping.get_column_values("T"), + ("\"\"" for _ in range(overlapping.rows)), + ("\"\"" for _ in range(overlapping.rows)))), + columns=(p_model.uid, r_model.uid, "T", "Sigma", "Z"), + index=[i for i in range(overlapping.rows)], + origin=[p_model.uid, r_model.uid] + ) - # Get samples that have overlapping timestamp - over_block = block.overlapping_rows(second_block, subset=["T"]) - - # Check rows constraint - if over_block.rows >= self.settings.learn_block_minimum: - - # Calculate reliability (which block???) - alpha = self.calc_reliability(r_model, p_model, block) - - #alpha_systematic = alpha < 0 - #alpha_weak_reliability = 0 <= alpha < self.settings.min_reliability - #if (self.settings.allow_weak_reliability and - # alpha_weak_reliability) or alpha_systematic: - - if self.settings.allow_weak_reliability and \ - alpha > self.settings.min_reliability: - - # Create learnblock from the aim values of the overlapping # samples - data = list(zip(over_block.get_column_values("Z"), - over_block.get_column_values("T"), - ["\"\"" for _ in range(over_block.rows)], - ["\"\"" for _ in range(over_block.rows)])) - feature = ".".join(["0", str(tier+1), "1"]) - columns = [feature, "T", "Sigma", "Z"] - source = self.source.new_learnblock( - values=data, columns=columns, index=over_block.indexes, - origin=[p_model.uid, r_model.uid]) - TS_QUEUE.append((tier+1, source)) + # data = list(zip(over_block.get_column_values("Z"), + # over_block.get_column_values("T"), + # ["\"\"" for _ in range(over_block.rows)], + # ["\"\"" for _ in range(over_block.rows)])) + # feature = ".".join(["0", str(tier+1), "1"]) + # columns = [feature, "T", "Sigma", "Z"] + # source = self.source.new_learnblock( + # values=data, columns=columns, index=over_block.indexes, + # origin=[p_model.uid, r_model.uid]) + TS_QUEUE.append((tier+1, overblock)) success = True if not success: @@ -421,24 +423,17 @@ class Deconstructor: # Get learnblock that trained relative model second_block = r_model.trained_with(self.source) - # Get samples that have overlapping rows - overlapping_block = block.overlapping_rows(second_block) + overlapping_block = block.same_features_fusion(second_block) # Check constraint - if overlapping_block.rows >= 2: - + if overlapping_block.n_features >= 2: # Model fusion - new_model = p_model.fusion( - r_model, self.NEXT_MODEL_COUNTER(tier)) + new_model = p_model.fusion(r_model, self.NEXT_MODEL_COUNTER(tier)) which_ml_models = new_model.sigma - - # Get learnblock - train_block = block.fusion(second_block) - try: # Reconstruct model recon_m = self.reconstructor.reconstruct( - tier, train_block, which_ml_models, new_model) + tier, overlapping_block, which_ml_models, new_model) self.knowledge_database.replace(r_model, recon_m) success = True @@ -516,7 +511,7 @@ class Deconstructor: # Create submodel from TSgima relative samples second_block = r_model.trained_with(self.source) - new_block = block.fusion(second_block) + new_block = block.same_features_fusion(second_block) ts_relatives = self.source.time_sigma_relatives(new_block) which_ml_models = p_model.subject + r_model.subject self.reconstructor.reconstruct( @@ -527,7 +522,7 @@ class Deconstructor: # Create learnblock first_block = p_model.trained_with(self.source) second_block = r_model.trained_with(self.source) - new_block = first_block.fusion(second_block) + new_block = first_block.same_features_fusion(second_block) which_ml_models = new_model.sigma try: @@ -583,18 +578,25 @@ class Deconstructor: set(first_block.columns()).intersection(set(second_block.columns())) ) - def calc_reliability(self, - model_a: PragmaticMachineLearningModel, - model_b: PragmaticMachineLearningModel, - block): - y_one = model_a.model.predict(block.as_numpy_array()) - y_two = model_b.model.predict(block.as_numpy_array()) - reliability_data = [y_one, y_two] + def calculate_reliability(self, predicts_a, predicts_b): + predictions = [predicts_a, predicts_b] if self.reconstructor.category == "conceptual": - return krippendorff.alpha(reliability_data, - level_of_measurement="nominal") - elif self.reconstructor.category == "procedural": - return krippendorff.alpha(reliability_data, - level_of_measurement="ratio") - else: - raise ValueError() + return krippendorff.alpha(predictions, level_of_measurement="nomimal") + elif self.reconstructor.category: + return krippendorff.alpha(predictions, level_of_measurement="ration") + # + # def calc_reliability(self, + # model_a: PragmaticMachineLearningModel, + # model_b: PragmaticMachineLearningModel, + # block): + # y_one = model_a.model.predict(block.as_numpy_array()) + # y_two = model_b.model.predict(block.as_numpy_array()) + # reliability_data = [y_one, y_two] + # if self.reconstructor.category == "conceptual": + # return krippendorff.alpha(reliability_data, + # level_of_measurement="nominal") + # elif self.reconstructor.category == "procedural": + # return krippendorff.alpha(reliability_data, + # level_of_measurement="ratio") + # else: + # raise ValueError()