Newer
Older
from abc import abstractmethod, ABC
import pandas as pd
class Adapter(ABC):
@abstractmethod
def read_csv(self): pass
@abstractmethod
def sort(self, *args, **kwargs): pass
@abstractmethod
def set_column_value(self, *args, **kwargs): pass
@abstractmethod
def set_column_values(self, *args, **kwargs): pass
@abstractmethod
def get_column_name_by_index(self, *args, **kwargs): pass
@abstractmethod
def get_column_index_by_index(self, *args, **kwargs): pass
@abstractmethod
def get_columns(self, *args, **kwargs): pass
@abstractmethod
def drop_colunn_by_name(self, *args, **kwargs): pass
@abstractmethod
def get_column_values_as_list(self, *args, **kwargs): pass
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
@abstractmethod
def get_block(self, *args, **kwargs): pass
# TODO (dmt): Provide common base class or pandas operations.
class PandasBlock:
def __init__(self, data_block):
self.__data_block = data_block
def __str__(self):
return str(self.__data_block)
def get_duplicated_pairs(self, *args):
bool_series = self.__data_block.duplicated(subset=[args[0], args[1]])
duplicates = self.__data_block[bool_series]
for i, j in zip(duplicates[args[0]], duplicates[args[1]]):
yield i, j
def get_values(self, **kwargs):
t, z, sigma = kwargs.get("T"), kwargs.get("Z"), kwargs.get("Sigma")
if t and z:
data_frame = self.__data_block.loc[
(self.__data_block["T"] == t) & (self.__data_block["Z"] == z)]
elif t and sigma:
data_frame = self.__data_block.loc[
(self.__data_block["T"] == t) & (
self.__data_block["Sigma"] == sigma)]
elif z and sigma:
data_frame = self.__data_block.loc[
(self.__data_block["Z"] == z) & (
self.__data_block["Sigma"] == sigma)]
else:
# TODO (dmt): Write proper error handling.
raise Exception()
return PandasBlock(data_frame)
@property
def length(self):
return len(self.__data_block)
class PandasAdapter:
def __init__(self, data_frame):
self.__data_frame = data_frame
@classmethod
def read_csv_data(cls, path):
data_frame = pd.read_csv(path)
return PandasAdapter(data_frame)
@property
def length(self):
return len(self.__data_frame)
def get_block(self, start, end=None, step=None):
return PandasBlock(self.__data_frame[start:end:step])
def get_column_values(self, column_name):
return self.__data_frame[column_name]
def get_column_values_as_list(self, column_name):
return self.__data_frame[column_name].tolist()
def get_columns(self):
return list(self.__data_frame.columns)
def drop_column_by_index(self, index):
column = self.get_column_name_by_index(index)
self.__data_frame.drop(columns=[column], inplace=True)
def drop_column_by_name(self, name):
self.__data_frame.drop(columns=[name], inplace=True)
def get_column_index_by_name(self, name):
return self.__data_frame.columns.get_loc(name)
def get_column_name_by_index(self, index):
column_names = self.__data_frame.columns
return column_names[index]
def set_column_value(self, column_name, value):
self.__data_frame[column_name] = value
def sort(self, column_name, ascending=True):
self.__data_frame.sort_values(by=[column_name],
ascending=ascending,
inplace=True)
def set_column_values(self, column, values):
self.__data_frame[column] = values