Commit d4ffb96e authored by Janos Borst's avatar Janos Borst
Browse files

Merge branch 'dev' into '30-save-and-load-function'

# Conflicts:
#   mlmc/data/__init__.py
parents 50d5ec4a 7e0cdb27
Pipeline #42801 failed with stage
in 1 minute and 55 seconds
......@@ -15,10 +15,6 @@ import mlmc.models
import mlmc.graph
import mlmc.metrics
import mlmc.representation
import mlmc.experimental
import mlmc.experimental.data
import mlmc.experimental.le
import mlmc.experimental.models
# Save and load models for inference
from .save_and_load import save, load
......
......@@ -180,7 +180,7 @@ class MultiLabelDataset(Dataset):
self.classes = {map.get(k, k): v for k, v in self.classes.items()}
self.y = [[map.get(l, l) for l in labelset] for labelset in self.y]
def reduce(self, subset: dict):
def reduce(self, subset):
"""
Reduces the dataset to a subset of the classes.
......@@ -244,9 +244,9 @@ class SingleLabelDataset(MultiLabelDataset):
return {'text': self.x[idx], 'labels': torch.tensor(self.classes[self.y[idx][0]])}
class MultiOutputMultiLabelDataset(MultiLabelDataset):
class MultiOutputMultiLabelDataset(Dataset):
def __init__(self, classes, x, y, target_dtype=torch._cast_Float, **kwargs):
super(MultiLabelDataset, self).__init__(**kwargs)
super(MultiOutputMultiLabelDataset, self).__init__(**kwargs)
if isinstance(classes, dict):
self.classes = [classes.copy() for _ in range(len(y[0]))]
else:
......@@ -267,13 +267,13 @@ class MultiOutputMultiLabelDataset(MultiLabelDataset):
label_one_hot = [
torch.stack([torch.nn.functional.one_hot(torch.tensor(x[label]), len(x)) for label in labelset], 0) for
x, labelset in zip(self.classes, self.y[item])]
result.update({f"labels_{i}": v for i, v in enumerate(label_one_hot)})
result.update({f"labels_{i}": v.sum(0) for i, v in enumerate(label_one_hot)})
return result
class MultiOutputSingleLabelDataset(MultiLabelDataset):
def __init__(self, classes, x, y=None, **kwargs):
super(MultiLabelDataset, self).__init__(**kwargs)
class MultiOutputSingleLabelDataset(Dataset):
def __init__(self, classes, x, y=None, **kwargs):
super(MultiOutputSingleLabelDataset, self).__init__(**kwargs)
if y is not None:
if isinstance(classes, dict):
self.classes = [classes.copy() for _ in range(len(y[0]))]
......@@ -282,8 +282,12 @@ class MultiOutputSingleLabelDataset(MultiLabelDataset):
assert len(y[0]) == len(self.classes), "Number of labels and number of class dicts do not agree"
assert len(set([len(labelset) for labelset in y])) == 1, \
"Not all instances have the same number of labels."
assert all([len(labelset)==1 for outputset in y for labelset in outputset]) == 1, \
"All output sets must be of length 1."
assert len(set([len(labelset) for labelset in y])) == 1, \
"Not all instances have the same number of labels."
self.target_dtype = torch._cast_Float
self.x = x
self.y = y
......@@ -293,6 +297,9 @@ class MultiOutputSingleLabelDataset(MultiLabelDataset):
else:
return {'text': self.x[item], 'labels': torch.tensor([d[y[0]] for d, y in zip(self.classes, self.y[item])])}
def __len__(self):
return len(self.x)
def reduce(self, subset):
assert len(subset) == len(self.classes), "Subset and existing classes have varying outputsizes"
assert all([all([x in c.keys() for x in s.keys()]) for s, c in
......@@ -322,63 +329,6 @@ class MultiOutputSingleLabelDataset(MultiLabelDataset):
return MultiOutputSingleLabelDataset(x=new_data, y=new_labels, classes=new_classes)
from copy import deepcopy
class ZeroshotDataset:
def __init__(self, dataset, zeroshot_classes=None):
if isinstance(dataset,str):
try:
dataset = get_singlelabel_dataset("rcv1")
except AssertionError:
dataset = get_multilabel_dataset("rcv1")
train = dataset.get("train", None)
valid = dataset.get("valid", None)
test = dataset.get("test", None)
self.zeroshot_classes = zeroshot_classes
self.zeroshot_data = {}
if train is not None:
data = deepcopy(train)
data.remove(zeroshot_classes)
self.zeroshot_data["train"] = data
if valid is not None:
gzsl_data = deepcopy(valid)
gzsl_data.classes = valid.classes
self.zeroshot_data["valid_gzsl"] = gzsl_data
zsl_data = deepcopy(valid)
zsl_data.reduce(dict(zip(self.zeroshot_classes, range(len(zeroshot_classes)))))
self.zeroshot_data["valid_zsl"] = zsl_data
nsl_data = deepcopy(valid)
nsl_data.reduce(self.zeroshot_data["train"].classes)
self.zeroshot_data["valid_nsl"] = nsl_data
if test is not None:
gzsl_data = deepcopy(test)
gzsl_data.classes = test.classes
self.zeroshot_data["test_gzsl"] = gzsl_data
zsl_data = deepcopy(test)
zsl_data.reduce(dict(zip(self.zeroshot_classes, range(len(zeroshot_classes)))))
self.zeroshot_data["test_zsl"] = zsl_data
nsl_data = deepcopy(test)
nsl_data.reduce(self.zeroshot_data["train"].classes)
self.zeroshot_data["test_nsl"] = nsl_data
def get(self, n):
return self.zeroshot_data[n]
def __getitem__(self, item):
return self.zeroshot_data[item]
# -------------------------------------------------------------------------------------
......
......@@ -38,7 +38,7 @@ def _save_to_tmp(dataset, data):
if not Path.is_file(CACHE / dataset):
with open(CACHE / dataset, "wb") as f:
pickle.dump(data, f)
return True
return CACHE / dataset
else:
return False
......
from .data import *
from .le import *
from .models import *
\ No newline at end of file
from torch.utils.data import Dataset
import numpy as np
class GraphDataset(Dataset):
def __init__(self, graph, n=2, target=["description", "extract", "label"], **kwargs):
super(GraphDataset, self).__init__(**kwargs)
self.graph = graph.to_undirected(reciprocal=False)
self.nodes = list(graph.nodes)
self.n = n
self.d = dict(self.graph.nodes(True))
self.target=target
def __getitem__(self, idx):
# print(idx)
neighbours = list(self.graph.neighbors(self.nodes[idx]))
searching = True
while (searching):
neg_idx = np.random.randint(len(self.nodes))
if (self.nodes[neg_idx], self.nodes[idx]) not in self.graph.edges:
searching = False
negative_neighbours = list(self.graph.neighbors(self.nodes[neg_idx]))
random_neighbours = np.random.choice(neighbours, self.n,
replace=False if self.n < len(neighbours) else True).tolist()
random_negative_neighbours = [self.nodes[neg_idx]] + np.random.choice(negative_neighbours, self.n,
replace=False if self.n < len(
negative_neighbours) else True).tolist()
return {'input': self._extract(self.nodes[idx]),
'neighbours':[self._extract(x) for x in random_neighbours],
'negatives': [self._extract(x) for x in random_negative_neighbours]}
def _extract(self, x):
r = x
if self.target == ["label"]:
return x
for t in self.target:
if t in self.d[x].keys():
r += ": " + self.d[x][t]
break
return r
def __len__(self):
return len(self.nodes)
\ No newline at end of file
import torch
class ExpertCoRep(torch.nn.Module):
""" Implementation of the expert system as propsed in http://proceedings.mlr.press/v97/zhang19l/zhang19l.pdf"""
def __init__(self, in_features, out_features, centers,activation=torch.relu, K=10, trainable=False):
super(ExpertCoRep,self).__init__()
self.K = K
self.activation = activation
self.projections = torch.nn.ModuleList(
[torch.nn.Linear(in_features=in_features, out_features=out_features)
for _ in range(K)]
)
self.output_projection = torch.nn.ModuleList(
[torch.nn.Linear(in_features=out_features, out_features=out_features)
for _ in range(K)]
)
assert len(centers.shape) == 1 or centers.shape[0]==K,"Centers must either be a tensor of single dimension or " \
"the first dimension must equal the number of experts in K"
if len(centers.shape) == 1:
self.single_center=True
self.centers = torch.nn.Parameter(centers[None])
elif centers.shape[0]==K:
self.single_center=False
self.centers= torch.nn.Parameter(centers)
self.centers.requires_grad=trainable
def expert(self, x, i):
if self.single_center:
return self.activation(self.projections[i](x-self.centers))
else:
return self.activation(self.projections[i](x-self.centers[i]))
def forward(self,x):
return torch.stack([self.expert(x,i) for i in range(self.K)]).sum(0)
import torch
class DistanceAbstract(torch.nn.Module):
def __init__(self, distances=["cosine", "p1"], y=None):
super(DistanceAbstract, self).__init__()
self.distances = distances
if y is not None:
self.set_y(y)
def _cov(self, m):
m_exp = torch.mean(m.transpose(-1,-2), dim=-1)
x = m.transpose(-1,-2) - m_exp.unsqueeze(-1)
if len(x.shape)==2:
cov = x.mm(x.transpose(-1,-2)) / (x.size(1) - 1)
else:
cov = torch.bmm(x,(x.transpose(-1,-2))) / (x.size(1) - 1)
return cov
def _p1(self, x, y):
return (x - y).norm(p=1, dim=-1) / x.shape[-1]
def _p2(self, x, y):
return (x - y).norm(p=2, dim=-1) / x.shape[-1]
def _p3(self, x, y):
return (x - y).norm(p=3, dim=-1) / x.shape[-1]
def _p4(self, x, y):
return (x - y).norm(p=4, dim=-1) / x.shape[-1]
def _jsd(self, x, y):
xsf = torch.softmax(x, -1)
return (xsf * torch.log(xsf / torch.softmax(y, -1))).sum(-1)
def _scalar(self, x, y):
return (x * y).sum(-1)
def forward(self,x, y=None):
xtmp, ytmp = self._reshape(x, self.y if y is None else y)
if y is not None and "mahalanobis" in self.distances:
self.cov = self._cov(y)
return torch.stack([getattr(self,"_"+s)(xtmp,ytmp) for s in self.distances],-1)
def set_y(self, y):
self.y = y
if "mahalanobis" in self.distances and y is not None:
self.cov = self._cov(y)
#
#
# class DistanceAll(DistanceAbstract):
# """ Implementation of the expert system as propsed in http://proceedings.mlr.press/v97/zhang19l/zhang19l.pdf"""
#
# def __init__(self, **kwargs):
# super(DistanceAll,self).__init__(**kwargs)
#
# def _cosine(self, x, y):
# if self.batch_broadcast:
# return torch.matmul((x[:,0]/x[:,0].norm(dim=-1, p=2, keepdim=True)), (y[0,:,0]/y[0,:,0].norm(dim=-1, p=2, keepdim=True)).t()).transpose(1,2)
# else:
# return torch.bmm(x[:,0].squeeze(-1)/x[:,0].squeeze(-1).norm(dim=-1, p=2, keepdim=True), (y[:,:,0].squeeze(-1)/y[:,:,0].squeeze(-1).norm(dim=-1, p=2, keepdim=True)).transpose(1,2)).transpose(1,2)
#
#
# def _mahalanobis(self,x,y):
# diff = (x-y)
# if len(self.cov.shape) == 2:
# return torch.sqrt((torch.matmul(diff, self.cov.transpose(-1,-2)) * diff).sum(-1)).squeeze(-1)
# elif len(self.cov.shape) == 3:
# return torch.sqrt((torch.einsum("ijkl,iml->ijkm",diff, self.cov.transpose(-1,-2)) * diff).sum(-1)).squeeze(-1)
#
# def _lrd(self, x , y):
# dist1 = self._p2(x,y)
# if self.batch_broadcast:
# dist2 = self._p2(y[0],y[:,:,0])
# else:
# dist2 = self._p2(y,y[:,None][:,:,:,0])
#
# dist2 = (dist2 + dist2.max()*torch.eye(dist2.shape[-1],dist2.shape[-1])).min(-1)[0]
# return torch.sqrt((1-dist1/dist2.unsqueeze(-1))**2)
#
# def _reshape(self, x,y):
# assert not ( y.shape[0] == 1 and x.shape != 1), "This is ambigous. [y].shape[0] cant be 1"
#
# if len(x.shape) == len(y.shape):
# ytmp = y
# self.batch_broadcast = False
# else:
# ytmp = y[None]
# self.batch_broadcast = True
#
# xtmp = x[:, None]
# ytmp = ytmp[:, :, None]
# # print(xtmp.shape, ytmp.shape)
# return xtmp, ytmp
class DistanceCorrespondence(DistanceAbstract):
"""implementation of various comparison metrics"""
def __init__(self, **kwargs):
super(DistanceCorrespondence,self).__init__(**kwargs)
def _cosine(self, x, y):
r = (x / x.norm(p=2, dim=-1, keepdim=True) * (y / y.norm(p=2, dim=-1, keepdim=True))[None]).sum(-1)
return r
def _p1(self, x, y):
return -(x - y[None]).norm(p=1, dim=-1)
def _p2(self, x, y):
return -(x - y[None]).norm(p=2, dim=-1)
def _p3(self, x, y):
return -(x - y[None]).norm(p=3, dim=-1)
def _p4(self, x, y):
return -(x - y[None]).norm(p=4, dim=-1)
def _jsd(self, x, y):
xsf = torch.softmax(x, -1)
return 1-(xsf * torch.log(xsf / torch.softmax(y, -1)[ None])).sum(-1)
def _scalar(self, x, y):
return (x* y[None]).sum(-1)
def _mahalanobis(self, x, y):
diff = (x- y[None])
return torch.sqrt((torch.matmul(diff, self.cov.transpose(-1, -2)) * diff).sum(-1)).squeeze(-1)
def _lrd(self, x, y):
dist1 = self._p2(x, y)
dist2 = (y[None] - y[:, None]).norm(p=2, dim=-1)
dist2 = (dist2 + dist2.max() * torch.eye(dist2.shape[-1], dist2.shape[-1]).to(x.device)).min(-1)[0]
return torch.sqrt((1 - dist1 / dist2[None]) ** 2)
def _reshape (self, x, y):
assert x.shape[1] == y.shape[0], "Number of dimensions differ in x and y"
return x,y
class Distance(DistanceAbstract):
def __init__(self, **kwargs):
super(Distance,self).__init__(**kwargs)
def _cosine(self, x, y):
r = torch.matmul(x/x.norm(p=2, dim=-1, keepdim=True),(y/ y.norm(p=2, dim=-1, keepdim=True)).t())
return r
def _p1(self, x, y):
return (x[:,:,None]- y[None]).norm(p=1, dim=-1)
def _p2(self, x, y):
return (x[:,:,None]- y[None]).norm(p=2, dim=-1)
def _p3(self, x, y):
return (x[:,:,None]- y[None]).norm(p=3, dim=-1)
def _p4(self, x, y):
return (x[:,:,None] - y[None]).norm(p=4, dim=-1)
def _jsd(self, x, y):
xsf = torch.softmax(x, -1)
return (xsf [:,:,None] * torch.log(xsf[:,:,None] / torch.softmax(y, -1)[None,None])).sum(-1)
def _scalar(self, x, y):
return torch.matmul(x, y.t())
def _mahalanobis(self, x, y):
diff = (x[:,:,None] - y[None,None])
return torch.sqrt((torch.matmul(diff, self.cov.transpose(-1, -2)) * diff).sum(-1)).squeeze(-1)
def _min_distances(self, y):
if y.shape[0] > 1000:
batch_size = 100
dis = []
for b in range(0, y.shape[0], batch_size):
batch = y[b:(b + batch_size)]
batch_distances = (batch[None] - y[:, None]).norm(p=2, dim=-1)
# m = batch_distances.max()
# for i in range(batch_distances.shape[-1]):
# batch_distances[i, i] = m
batch_distances[batch_distances==0] = batch_distances.max()
batch_distances_min = batch_distances.min(0)[0]
dis.append(batch_distances_min)
return torch.cat(dis, 0)
else:
dist2 = (y[None] - y[:, None]).norm(p=2, dim=-1)
dist2 = (dist2 + dist2.max() * torch.eye(dist2.shape[-1], dist2.shape[-1]).to(dist2.device)).min(-1)[0]
return dist2
def _lrd(self, x, y):
dist1 = self._p2(x, y)
return torch.sqrt((1 - dist1 / self.min_distances[None,None]) ** 2)
def _reshape(self, x,y):
assert len(y.shape)==2, "y has to be 2D"
return x, y
def forward(self,x, y=None):
if y is not None:
self.set_y(y)
xtmp, ytmp = self._reshape(x, self.y)
return torch.stack([getattr(self,"_"+s)(xtmp,ytmp) for s in self.distances],-1)
def set_y(self, y):
self.y = y
if "mahalanobis" in self.distances and y is not None:
self.cov = self._cov(y)
if "lrd" in self.distances:
self.min_distances = self._min_distances(y)
class Similarity(DistanceAbstract):
def __init__(self, fuzzyness=1.0, **kwargs):
super(Similarity,self).__init__(**kwargs)
self.fuzzyness = fuzzyness
def _cosine(self, x, y):
r = torch.matmul(x/x.norm(p=2, dim=-1, keepdim=True),(y/ y.norm(p=2, dim=-1, keepdim=True)).t())
return r
def _p1(self, x, y):
return torch.exp(-self.fuzzyness*(x[:,:,None]- y[None]).norm(p=1, dim=-1))
def _p2(self, x, y):
return torch.exp(-self.fuzzyness*(x[:,:,None]- y[None]).norm(p=2, dim=-1))
def _p3(self, x, y):
return torch.exp(-self.fuzzyness*(x[:,:,None]- y[None]).norm(p=3, dim=-1))
def _p4(self, x, y):
return torch.exp(-self.fuzzyness*(x[:,:,None] - y[None]).norm(p=4, dim=-1))
def _jsd(self, x, y):
xsf = torch.softmax(x, -1)
return torch.exp(-self.fuzzyness* (xsf [:,:,None] * torch.log(xsf[:,:,None] / torch.softmax(y, -1)[None,None])).sum(-1))
def _scalar(self, x, y):
return torch.softmax(torch.matmul(x, y.t()),-2)
def _mahalanobis(self, x, y):
diff = (x[:,:,None] - y[None,None])
return torch.exp(-self.fuzzyness*torch.sqrt((torch.matmul(diff, self.cov.transpose(-1, -2)) * diff).sum(-1)).squeeze(-1))
def _min_distances(self, y):
if y.shape[0] > 1000:
batch_size = 100
dis = []
for b in range(0, y.shape[0], batch_size):
batch = y[b:(b + batch_size)]
batch_distances = (batch[None] - y[:, None]).norm(p=2, dim=-1)
# m = batch_distances.max()
# for i in range(batch_distances.shape[-1]):
# batch_distances[i, i] = m
batch_distances[batch_distances==0] = batch_distances.max()
batch_distances_min = batch_distances.min(0)[0]
dis.append(batch_distances_min)
return torch.cat(dis, 0)
else:
dist2 = (y[None] - y[:, None]).norm(p=2, dim=-1)
dist2 = (dist2 + dist2.max() * torch.eye(dist2.shape[-1], dist2.shape[-1]).to(dist2.device)).min(-1)[0]
return dist2
def _lrd(self, x, y):
dist1 = self._p2(x, y)
return torch.exp(-self.fuzzyness*(1 - dist1 / self.min_distances[None,None]) ** 2)
def _reshape(self, x,y):
assert len(y.shape)==2, "y has to be 2D"
return x, y
def forward(self,x, y=None):
if y is not None:
self.set_y(y)
xtmp, ytmp = self._reshape(x, self.y)
return torch.stack([getattr(self,"_"+s)(xtmp,ytmp) for s in self.distances],-1)
def set_y(self, y):
self.y = y
if "mahalanobis" in self.distances and y is not None:
self.cov = self._cov(y)
if "lrd" in self.distances:
self.min_distances = self._min_distances(y)
# y = torch.rand(1000, 300)
# d = Distance(distances=["lrd","p1", "p2", "p3", "cosine", "mahalanobis", "scalar", "jsd"])
# # d.set_y(x)
#
# x = torch.rand(2,140, 300)
# y = torch.rand(2000, 300)
# d.set_y(y)
# print(d(x).shape)
# #
# # x = torch.rand(1,140, 300)
# # y = torch.rand(140, 300)
# # d(x,y).shape
# #
# # # x = torch.rand(2,200, 300)
# # # # print(d(x,y).shape)
# #
# y = torch.rand(140, 300)
# #
#
#
#
from .CoRepresentation import *
from .Distances import *
from .lstm import *
from .metric_layers import *
from .weighted_aggregation import *
\ No newline at end of file
import torch
import torch.nn as nn
from torch.nn.utils.rnn import PackedSequence
from typing import *
class VariationalDropout(nn.Module):
"""
Applies the same dropout mask across the temporal dimension
See https://arxiv.org/abs/1512.05287 for more details.
Note that this is not applied to the recurrent activations in the LSTM like the above paper.
Instead, it is applied to the inputs and outputs of the recurrent layer.
"""