From ef5714f70a235dcc5577e44c64af6fc926754f37 Mon Sep 17 00:00:00 2001 From: dmt <> Date: Tue, 1 Oct 2019 20:50:54 +0200 Subject: [PATCH] Define abstract adapter for all data sources and a pandas wrapper. --- cml/ports/source_adapters.py | 77 ++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 cml/ports/source_adapters.py diff --git a/cml/ports/source_adapters.py b/cml/ports/source_adapters.py new file mode 100644 index 0000000..7b33f85 --- /dev/null +++ b/cml/ports/source_adapters.py @@ -0,0 +1,77 @@ +from abc import abstractmethod, ABC + +import pandas as pd + + +class Adapter(ABC): + + @abstractmethod + def read_csv(self): pass + + @abstractmethod + def sort(self, *args, **kwargs): pass + + @abstractmethod + def set_column_value(self, *args, **kwargs): pass + + @abstractmethod + def set_column_values(self, *args, **kwargs): pass + + @abstractmethod + def get_column_name_by_index(self, *args, **kwargs): pass + + @abstractmethod + def get_column_index_by_index(self, *args, **kwargs): pass + + @abstractmethod + def get_columns(self, *args, **kwargs): pass + + @abstractmethod + def drop_colunn_by_name(self, *args, **kwargs): pass + + @abstractmethod + def get_column_values_as_list(self, *args, **kwargs): pass + + +class PandasAdapter: + def __init__(self, data_frame): + self.__data_frame = data_frame + + @classmethod + def read_csv_data(cls, path): + data_frame = pd.read_csv(path) + return PandasAdapter(data_frame) + + def get_column_values(self, column_name): + return self.__data_frame[column_name] + + def get_column_values_as_list(self, column_name): + return self.__data_frame[column_name].tolist() + + def get_columns(self): + return self.__data_frame.columns + + def drop_column_by_index(self, index): + column = self.get_column_name_by_index(index) + self.__data_frame.drop(columns=[column], inplace=True) + + def drop_column_by_name(self, name): + self.__data_frame.drop(columns=[name], inplace=True) + + def get_column_index_by_name(self, name): + return self.__data_frame.get_loc(name) + + def get_column_name_by_index(self, index): + column_names = self.__data_frame.columns + return column_names[index] + + def set_column_value(self, column_name, value): + self.__data_frame[column_name] = value + + def sort(self, column_name, ascending=True): + self.__data_frame.sort_values(by=[column_name], + ascending=ascending, + inplace=True) + + def set_column_values(self, column, values): + self.__data_frame[column] = values -- GitLab