from abc import abstractmethod, ABC import pandas as pd class Adapter(ABC): @abstractmethod def read_csv(self): pass @abstractmethod def sort(self, *args, **kwargs): pass @abstractmethod def set_column_value(self, *args, **kwargs): pass @abstractmethod def set_column_values(self, *args, **kwargs): pass @abstractmethod def get_column_name_by_index(self, *args, **kwargs): pass @abstractmethod def get_column_index_by_index(self, *args, **kwargs): pass @abstractmethod def get_columns(self, *args, **kwargs): pass @abstractmethod def drop_colunn_by_name(self, *args, **kwargs): pass @abstractmethod def get_column_values_as_list(self, *args, **kwargs): pass @abstractmethod def get_block(self, *args, **kwargs): pass # TODO (dmt): Provide common base class or pandas operations. class PandasBlock: def __init__(self, data_block): self.__data_block = data_block def __str__(self): return str(self.__data_block) def get_duplicated_pairs(self, *args): bool_series = self.__data_block.duplicated(subset=[args[0], args[1]]) duplicates = self.__data_block[bool_series] for i, j in zip(duplicates[args[0]], duplicates[args[1]]): yield i, j def get_values(self, **kwargs): t, z, sigma = kwargs.get("T"), kwargs.get("Z"), kwargs.get("Sigma") if t and z: data_frame = self.__data_block.loc[ (self.__data_block["T"] == t) & (self.__data_block["Z"] == z)] elif t and sigma: data_frame = self.__data_block.loc[ (self.__data_block["T"] == t) & ( self.__data_block["Sigma"] == sigma)] elif z and sigma: data_frame = self.__data_block.loc[ (self.__data_block["Z"] == z) & ( self.__data_block["Sigma"] == sigma)] else: # TODO (dmt): Write proper error handling. raise Exception() return PandasBlock(data_frame) @property def length(self): return len(self.__data_block) class PandasAdapter: def __init__(self, data_frame): self.__data_frame = data_frame @classmethod def read_csv_data(cls, path): data_frame = pd.read_csv(path) return PandasAdapter(data_frame) def get_column_values(self, column_name): return self.__data_frame[column_name] def get_column_values_as_list(self, column_name): return self.__data_frame[column_name].tolist() def get_columns(self): return self.__data_frame.columns def drop_column_by_index(self, index): column = self.get_column_name_by_index(index) self.__data_frame.drop(columns=[column], inplace=True) def drop_column_by_name(self, name): self.__data_frame.drop(columns=[name], inplace=True) def get_column_index_by_name(self, name): return self.__data_frame.get_loc(name) def get_column_name_by_index(self, index): column_names = self.__data_frame.columns return column_names[index] def set_column_value(self, column_name, value): self.__data_frame[column_name] = value def sort(self, column_name, ascending=True): self.__data_frame.sort_values(by=[column_name], ascending=ascending, inplace=True) def set_column_values(self, column, values): self.__data_frame[column] = values