Commit 79a80278 authored by Janos Borst's avatar Janos Borst
Browse files

EndOfWork

parent f9f13487
......@@ -2,6 +2,7 @@ from .probability import Prob
from .lstm import LSTM, LSTMRD
from .label_layers import LabelEmbeddingScoring,AdaptiveCombination,LabelAttention, LabelSpecificSelfAttention
from .metric_layers import Bilinear, Metric
from .weighted_aggregation import AttentionWeightedAggregation
# Make pytorch_geometric effectively optional
try:
from .graph import GatedGraphConv
......
import torch
class AttentionWeightedAggregation(torch.nn.Module):
"""
Aggregate a tensor weighted by weights obtained from self attention
https://arxiv.org/pdf/1703.03130.pdf
"""
def __init__(self, in_features, d_a):
super(AttentionWeightedAggregation, self).__init__()
self.in_features=in_features
self.d_a = d_a
self.att_projection = torch.nn.Linear(in_features=self.in_features, out_features=self.d_a, bias=False)
self.att_weights = torch.nn.Parameter(torch.zeros((self.d_a,)))
torch.nn.init.normal_(self.att_weights, mean=0.0, std=0.001)
def forward(self, x, aggr=None, return_att=True):
attention = torch.softmax(torch.matmul(torch.tanh(self.att_projection(x)), self.att_weights), -1)
if aggr is None:
output = (attention[:, :, None]*x).sum(-2)
else:
output = (attention[:, :, None]*aggr).sum(-2)
if return_att:
return output, attention
else:
return output
......@@ -6,7 +6,7 @@ from .abstracts import TextClassificationAbstract
from ..representation import get, is_transformer
import re
from ..representation.labels import makemultilabels
from ..layers import Bilinear
from ..layers import Bilinear, AttentionWeightedAggregation
from ignite.metrics import Average
from tqdm import tqdm
......@@ -139,3 +139,56 @@ class BertAsConcept2(TextClassificationAbstract):
print("Labels:\t", label)
print("Concepts:\t", concepts)
class BertAsConcept3(TextClassificationAbstract):
"""
https://raw.githubusercontent.com/EMNLP2019LSAN/LSAN/master/attention/model.py
"""
def __init__(self, classes, representation="roberta", label_freeze=True, max_len=300, **kwargs):
super(BertAsConcept3, self).__init__(**kwargs)
# My Stuff
assert is_transformer(representation), "This model only works with transformers"
self.max_len = max_len
self.n_layers = 2
self.representation = representation
self._init_input_representations()
# Original
self.n_classes = len(classes)
self.label_freeze = label_freeze
self.d_a = 1024
self.classes = classes
self.labels = torch.nn.Parameter(self.embedding(self.transform(self.classes.keys()))[1])
self.labels.requires_grad = False
self.label_embedding_dim = self.labels.shape[-1]
self.input_projection2 = torch.nn.Linear(self.label_embedding_dim, self.embedding_dim)
self.metric = Bilinear(self.embedding_dim).to(self.device)
self.output_projection = torch.nn.Linear(in_features=self.max_len , out_features=1)
self.att = AttentionWeightedAggregation(in_features = self.embedding_dim, d_a=self.d_a)
self.build()
def forward(self, x, return_scores=False):
with torch.no_grad():
embeddings = torch.cat(self.embedding(x)[2][(-1 - self.n_layers):-1], -1)
p2 = self.input_projection2(self.labels)
label_scores = torch.matmul(embeddings,p2.t())
output, att = self.att(embeddings, label_scores, return_att=True)
if return_scores:
return output, label_scores, att
return output
def create_labels(self, classes):
if hasattr(self, "labels"):
del self.labels
self.classes = classes
self.labels = torch.nn.Parameter(self.embedding(self.transform(self.classes.keys()).to(self.device))[1])
self.labels.requires_grad = False
self.label_embedding_dim = self.labels.shape[-1]
"""
https://raw.githubusercontent.com/EMNLP2019LSAN/LSAN/master/attention/model.py
"""
import torch
from .abstracts import TextClassificationAbstract
from ..representation import get, is_transformer
import re
from ..representation.labels import makemultilabels
from ..layers import Bilinear, AttentionWeightedAggregation
from ignite.metrics import Average
from tqdm import tqdm
from apex import amp
class BertAsConceptFineTuning(TextClassificationAbstract):
"""
https://raw.githubusercontent.com/EMNLP2019LSAN/LSAN/master/attention/model.py
"""
def __init__(self, classes, representation="roberta", label_freeze=True, max_len=300, **kwargs):
super(BertAsConceptFineTuning, self).__init__(**kwargs)
# My Stuff
assert is_transformer(representation), "This model only works with transformers"
self.max_len = max_len
self.n_layers = 2
self.representation = representation
self._init_input_representations()
# Original
self.n_classes = len(classes)
self.label_freeze = label_freeze
self.d_a = 1024
self.classes = classes
self.labels = torch.nn.Parameter(self.embedding(self.transform(self.classes.keys()))[1])
self.labels.requires_grad = False
self.label_embedding_dim = self.labels.shape[-1]
self.input_projection2 = torch.nn.Linear(self.label_embedding_dim, self.embedding_dim)
self.metric = Bilinear(self.embedding_dim)
self.output_projection = torch.nn.Linear(in_features=self.max_len , out_features=1)
self.att = AttentionWeightedAggregation(in_features = self.embedding_dim, d_a=self.d_a)
self.build()
def forward(self, x, return_scores=False):
embeddings = torch.cat(self.embedding(x)[2][(-1 - self.n_layers):-1], -1)
p2 = self.input_projection2(self.labels)
label_scores = torch.matmul(embeddings,p2.t())
output, att = self.att(embeddings, label_scores, return_att=True)
if return_scores:
return output, label_scores, att
return output
def create_labels(self, classes):
if hasattr(self, "labels"):
del self.labels
self.classes = classes
self.labels = torch.nn.Parameter(self.embedding(self.transform(self.classes.keys()).to(self.device))[1])
self.labels.requires_grad = False
self.label_embedding_dim = self.labels.shape[-1]
def build(self):
if isinstance(self.loss, type) and self.loss is not None:
self.loss = self.loss().to(self.device)
if isinstance(self.optimizer, type) and self.optimizer is not None:
self.optimizer = self.optimizer(filter(lambda p: p.requires_grad, self.parameters()), **self.optimizer_params)
self.to(self.device)
def _init_input_representations(self):
if not hasattr(self, "n_layers"): self.n_layers=4
self.embedding, self.tokenizer = get(self.representation, output_hidden_states=True)
self.embedding_dim = self.embedding(torch.LongTensor([[0]]))[0].shape[-1]*self.n_layers
# for param in self.embedding.parameters(): param.requires_grad = True
def fit(self, train, valid = None, epochs=1, batch_size=2, valid_batch_size=50, classes_subset=None):
validation=[]
train_history = {"loss": []}
reset_labels=10
self.labels_distance=[]
for e in range(epochs):
losses = {"loss": str(0.)}
average = Average()
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
with tqdm(train_loader,
postfix=[losses], desc="Epoch %i/%i" %(e+1,epochs)) as pbar:
for i, b in enumerate(train_loader):
self.optimizer.zero_grad()
y = b["labels"].to(self.device)
y[y!=0] = 1
x = self.transform(b["text"]).to(self.device)
output = self(x)
if hasattr(self, "regularize"):
l = self.loss(output, torch._cast_Float(y)) + self.regularize()
else:
l = self.loss(output, torch._cast_Float(y))
with amp.scale_loss(l, self.optimizer) as scaled_loss:
scaled_loss.backward()
self.optimizer.step()
average.update(l.item())
pbar.postfix[0]["loss"] = round(average.compute().item(),2*self.PRECISION_DIGITS)
pbar.update()
if i %(reset_labels) == 0:
f = self.labels.detach().cpu()
with torch.no_grad():
self.create_labels(self.classes)
f2 = self.labels.detach().cpu()
self.labels_distance.append(((f-f2)**2).sum(-1))
# torch.cuda.empty_cache()
if valid is not None:
validation.append(self.evaluate_classes(classes_subset=classes_subset,
data=valid,
batch_size=valid_batch_size,
return_report=False,
return_roc=False))
pbar.postfix[0].update(validation[-1])
pbar.update()
# torch.cuda.empty_cache()
train_history["loss"].append(average.compute().item())
return{"train":train_history, "valid": validation }
\ No newline at end of file
......@@ -21,5 +21,6 @@ except:
from .ConceptScores import ConceptScores, ConceptScoresCNN,ConceptScoresCNNAttention,KimCNN2Branch,ConceptProjection,ConceptScoresAttention, ConceptScoresRelevance, ConceptScoresRelevanceWithImportanceWeights
from .ConceptLSAN import ConceptLSAN
from .GloveAsConcept import GloveConcepts
from .BertAsConcept import BertAsConcept, BertAsConcept2
from .BertAsConcept import BertAsConcept, BertAsConcept2, BertAsConcept3
from .BertAsConcept2 import BertAsConceptFineTuning
......@@ -10,10 +10,10 @@ batch_size = 50
mode = "transformer"
representation = "roberta"
optimizer = torch.optim.Adam
optimizer_params = {"lr": 1e-5}#, "betas": (0.9, 0.99)}
optimizer_params = {"lr": 1e-4}#, "betas": (0.9, 0.99)}
loss = torch.nn.BCEWithLogitsLoss
dataset = "blurbgenrecollection"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
concept_graph = "random"
layers = 1
label_freeze = True
......@@ -28,14 +28,25 @@ data = mlmc.data.get_dataset(dataset,
valid_split=0.25,
target_dtype=torch._cast_Float)
data2 = mlmc.data.get_dataset("rcv1",
type=mlmc.data.MultiLabelDataset,
ensure_valid=False,
valid_split=0.25,
target_dtype=torch._cast_Float)
# CHange topic descriptions
data2["classes"]={data2["topicmap"][k].capitalize():v for k,v in data2["classes"].items()}
for key in ("train","test"):
data2[key].y = [[data2["topicmap"][l].capitalize() for l in labellist]for labellist in data2[key].y]
data2[key].classes = data2["classes"]
tc = mlmc.models.BertAsConcept2(
tc = mlmc.models.BertAsConcept3(
classes=data["classes"],
label_freeze=label_freeze,
representation=representation,
optimizer=optimizer,
# optimizer_params=optimizer_params,
#optimizer_params=optimizer_params,
loss=loss,
device=device)
......@@ -44,16 +55,34 @@ if data["valid"] is None:
train_sample = mlmc.data.class_sampler(data["train"], classes=["Business"],samples_size=100)
test_sample = mlmc.data.class_sampler(data["train"], classes=["Business"],samples_size=100)
history=tc.fit(train=mlmc.data.sampler(data["train"], absolute=10000),
valid=mlmc.data.sampler(data["valid"], absolute=10000),
history=tc.fit(train=data["train"],
valid=data["valid"],
batch_size=batch_size,
valid_batch_size=batch_size,
epochs=50)
i=4
print(test_sample.x[i])
print(test_sample.y[i])
print(tc.additional_concepts(test_sample.x[i], 10))
epochs=10)
##################
#
# SWITCH LABELS
#
if data2["valid"] is None:
data2["valid"] = mlmc.data.sampler(data2["test"], absolute=3000)
tc.create_labels(data2["classes"])
print(tc.evaluate(data2["valid"]))
history=tc.fit(train=data2["train"],
valid=data2["valid"],
batch_size=batch_size,
valid_batch_size=batch_size,
epochs=1)
mlmc.save(tc,"bert_as_concept_0.pt", only_inference=False)
i=4
# print(test_sample.x[i])
# print(test_sample.y[i])
# print(tc.additional_concepts(test_sample.x[i], 10))
#
print("test")
......
import mlmc
import torch
import re
import numpy as np
from apex import amp
epochs = 30
batch_size = 24
mode = "transformer"
representation = "roberta"
optimizer = torch.optim.Adam
optimizer_params = {"lr": 1e-6}#, "betas": (0.9, 0.99)}
loss = torch.nn.BCEWithLogitsLoss
dataset = "blurbgenrecollection"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
concept_graph = "random"
layers = 1
label_freeze = True
description= "LSAN extension with Glove embeddings."
data = mlmc.data.get_dataset(dataset,
type=mlmc.data.MultiLabelDataset,
ensure_valid=False,
valid_split=0.25,
target_dtype=torch._cast_Float)
# data2 = mlmc.data.get_dataset("rcv1",
# type=mlmc.data.MultiLabelDataset,
# ensure_valid=False,
# valid_split=0.25,
# target_dtype=torch._cast_Float)
#
# # CHange topic descriptions
# data2["classes"]={data2["topicmap"][k].capitalize():v for k,v in data2["classes"].items()}
# for key in ("train","test"):
# data2[key].y = [[data2["topicmap"][l].capitalize() for l in labellist]for labellist in data2[key].y]
# data2[key].classes = data2["classes"]
tc = mlmc.models.BertAsConceptFineTuning(
classes=data["classes"],
label_freeze=label_freeze,
representation=representation,
optimizer=optimizer,
optimizer_params=optimizer_params,
loss=loss,
max_len=200,
device=device)
tc, optimizer = amp.initialize(tc, tc.optimizer, opt_level="O1")
# tc.embedding.half()
if data["valid"] is None:
data["valid"] = mlmc.data.sampler(data["test"], absolute=50)
train_sample = mlmc.data.class_sampler(data["train"], classes=["Business"],samples_size=100)
test_sample = mlmc.data.class_sampler(data["train"], classes=["Business"],samples_size=100)
history=tc.fit(train=data["train"],
valid=data["valid"],
batch_size=32,
valid_batch_size=batch_size,
epochs=10)
##################
#
# SWITCH LABELS
#
if data2["valid"] is None:
data2["valid"] = mlmc.data.sampler(data2["test"], absolute=3000)
tc.create_labels(data2["classes"])
print(tc.evaluate(data2["valid"]))
history=tc.fit(train=data2["train"],
valid=data2["valid"],
batch_size=batch_size,
valid_batch_size=batch_size,
epochs=1)
mlmc.save(tc,"bert_as_concept_0.pt", only_inference=False)
i=4
# print(test_sample.x[i])
# print(test_sample.y[i])
# print(tc.additional_concepts(test_sample.x[i], 10))
#
print("test")
history=tc.fit(train=train_sample,
valid=train_sample,
batch_size=batch_size,
valid_batch_size=batch_size,
epochs=100)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment