Newer
Older
from abc import abstractmethod, ABC
import pandas as pd
class Adapter(ABC):
@abstractmethod
def read_csv(self): pass
@abstractmethod
def sort(self, *args, **kwargs): pass
@abstractmethod
def set_column_value(self, *args, **kwargs): pass
@abstractmethod
def set_column_values(self, *args, **kwargs): pass
@abstractmethod
def get_column_name_by_index(self, *args, **kwargs): pass
@abstractmethod
def get_column_index_by_index(self, *args, **kwargs): pass
@abstractmethod
def get_columns(self, *args, **kwargs): pass
@abstractmethod
def drop_colunn_by_name(self, *args, **kwargs): pass
@abstractmethod
def get_column_values_as_list(self, *args, **kwargs): pass
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
@abstractmethod
def get_block(self, *args, **kwargs): pass
# TODO (dmt): Provide common base class or pandas operations.
class PandasBlock:
def __init__(self, data_block):
self.__data_block = data_block
def __str__(self):
return str(self.__data_block)
def get_duplicated_pairs(self, *args):
bool_series = self.__data_block.duplicated(subset=[args[0], args[1]])
duplicates = self.__data_block[bool_series]
for i, j in zip(duplicates[args[0]], duplicates[args[1]]):
yield i, j
def get_values(self, **kwargs):
t, z, sigma = kwargs.get("T"), kwargs.get("Z"), kwargs.get("Sigma")
if t and z:
data_frame = self.__data_block.loc[
(self.__data_block["T"] == t) & (self.__data_block["Z"] == z)]
elif t and sigma:
data_frame = self.__data_block.loc[
(self.__data_block["T"] == t) & (
self.__data_block["Sigma"] == sigma)]
elif z and sigma:
data_frame = self.__data_block.loc[
(self.__data_block["Z"] == z) & (
self.__data_block["Sigma"] == sigma)]
else:
# TODO (dmt): Write proper error handling.
raise Exception()
return PandasBlock(data_frame)
@property
def length(self):
return len(self.__data_block)
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class PandasAdapter:
def __init__(self, data_frame):
self.__data_frame = data_frame
@classmethod
def read_csv_data(cls, path):
data_frame = pd.read_csv(path)
return PandasAdapter(data_frame)
def get_column_values(self, column_name):
return self.__data_frame[column_name]
def get_column_values_as_list(self, column_name):
return self.__data_frame[column_name].tolist()
def get_columns(self):
return self.__data_frame.columns
def drop_column_by_index(self, index):
column = self.get_column_name_by_index(index)
self.__data_frame.drop(columns=[column], inplace=True)
def drop_column_by_name(self, name):
self.__data_frame.drop(columns=[name], inplace=True)
def get_column_index_by_name(self, name):
return self.__data_frame.get_loc(name)
def get_column_name_by_index(self, index):
column_names = self.__data_frame.columns
return column_names[index]
def set_column_value(self, column_name, value):
self.__data_frame[column_name] = value
def sort(self, column_name, ascending=True):
self.__data_frame.sort_values(by=[column_name],
ascending=ascending,
inplace=True)
def set_column_values(self, column, values):
self.__data_frame[column] = values