Commit b8a23e24 authored by Fabian Ziegner's avatar Fabian Ziegner
Browse files

Added scripts

parent d70e9a88
import pandas as pd
import mdbh
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
db_name = ""
uri = mdbh.get_mongodb(".mongo.conf", db_name)
df = mdbh.get_dataframe(uri)
# df = pd.read_pickle("zeroshot_results.pkl")
df = df[df["id"] > 50]
df = df[["experiment.name", 'metrics.test_accuracy',
'metrics.test_multilabel_report_micro avg_f1-score', 'metrics.test_multilabel_report_micro avg_precision',
'metrics.test_multilabel_report_micro avg_recall', 'metrics.test_multilabel_report_macro avg_f1-score',
'metrics.test_multilabel_report_macro avg_precision', 'metrics.test_multilabel_report_macro avg_recall',
"metrics.test_p@1", "metrics.test_p@3", "metrics.test_p@5", "start_time", "stop_time", "config.batch_size",
"config.dataset", "config.representation", "config.threshold", "config.target", "config.formatted",
"config.method", "config.whole_dataset", "config.cut_sample", "config.dataset_size"]]
mapping = {
"experiment.name": "name",
"config.batch_size": "batch_size",
"config.dataset": "dataset",
"config.threshold": "threshold",
"config.target": "target",
"config.formatted": "formatted",
"config.method": "method",
"config.whole_dataset": "whole_dataset",
"config.cut_sample": "cut_sample",
"config.dataset_size": "dataset_size",
"metrics.test_accuracy": "accuracy",
"metrics.test_multilabel_report_micro avg_f1-score": "micro_f1",
"metrics.test_multilabel_report_micro avg_precision": "micro_precision",
"metrics.test_multilabel_report_micro avg_recall": "micro_recall",
"metrics.test_multilabel_report_macro avg_f1-score": "macro_f1",
"metrics.test_multilabel_report_macro avg_precision": "macro_precision",
"metrics.test_multilabel_report_macro avg_recall": "macro_recall",
"metrics.test_p@1": "precision@1",
"metrics.test_p@3": "precision@3",
"metrics.test_p@5": "precision@5",
"config.representation": "representation"
}
df = df.rename(columns=mapping)
# Calculate duration of experiments
time_list = []
for index, row in df.iterrows():
time = row['stop_time'] - row['start_time']
time_list.append(time)
df["duration"] = time_list
df = df.drop(columns=["stop_time", "start_time"])
df = df.sort_values('name')
df["duration"] = df["duration"].map(lambda x: str(x).split(" ")[-1][:8])
df.loc[df["cut_sample"] != True, "cut_sample"] = False
df.to_pickle("./zeroshot_results_formatted.pkl")
\ No newline at end of file
import flair
import mlmc
import torch
from flair.data import Sentence
from flair.models.text_classification_model import TARSClassifier
from sacred import Experiment, SETTINGS
from sacred.observers import MongoObserver
from sacred.utils import apply_backspaces_and_linefeeds
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
name = ""
user = ""
host = ""
database = ""
auth = ""
pw = ""
SETTINGS.CAPTURE_MODE = "sys"
ex = Experiment(name)
ex.observers.append(MongoObserver(url="localhost:27017", db_name=database))
ex.captured_out_filter = apply_backspaces_and_linefeeds
class ZeroshotClassification:
"""
Class to store model information and run methods. If Huggingface is used the class attribute self.classifier has to
be called, in case of Flair self.model.predict_zero_shot
"""
def __init__(self, classes, target, threshold, representation="facebook/bart-large-mnli", formatted=True, device=0):
"""
:param classes: A dictionary mapping class label to ID.
:param target: "single" if single-label, "multi" if multi-label.
:param threshold: Score threshold to use. (see mlmc.thresholds.thresholds_dict.keys())
:param representation: A huggingface model. (see https://huggingface.co/models)
:param formatted: If formatting is set to True each class label is replaced by a more descriptive label.
Furthermore, if the huggingface method is used the hypothesis is replaced as well.
:param device: GPU to use.
"""
if representation == "tars-base":
self.model = TARSClassifier.load('tars-base')
else:
self.model = AutoModelForSequenceClassification.from_pretrained(representation)
self.tokenizer = AutoTokenizer.from_pretrained(representation)
self.classifier = pipeline("zero-shot-classification", model=self.model, tokenizer=self.tokenizer,
device=device)
self.config = {"representation": representation,
"classes": classes,
"threshold": threshold,
"target": target,
"format": formatted, }
def init_metrics(self, metrics="default_singlelabel"):
"""
Initializes metrics to be used. If no metrics are specified then depending on the target the default metrics
for this target will be used. (see mlmc.metrics.metrics_config.items())
:param metrics: Name of the metrics (see mlmc.metrics.metrics_dict.keys() and mlmc.metrics.metrics_config.keys())
:return: A dictionary containing the initialized metrics
"""
metrics = mlmc.metrics.MetricsDict(metrics)
metrics.init(self.config)
metrics.reset()
return metrics
@ex.config
def ex_config():
"""
:param device: GPU to use.
:param batch_size: Batch size.
:param representation: A huggingface model. (see https://huggingface.co/models)
:param dataset: Dataset to use. (see mlmc.data.register.keys())
:param target: "single" if single-label, "multi" if multi-label.
:param threshold: Score threshold to use. (see mlmc.thresholds.thresholds_dict.keys())
:param formatted: If formatting is set to True each class label is replaced by a more descriptive label.
Furthermore, if the huggingface method is used the hypothesis is replaced as well.
:param cut_sample: Trims the input text to the maximum input size of the language model.
:param method: "huggingface" or "flair"
:param whole_dataset: If True the entire dataset is used for classification.
"""
device = 0
batch_size = 1
representation = "tals/albert-base-mnli"
dataset = "trec6"
target = "single"
threshold = "max"
if target == "multi":
threshold = "mcut"
formatted = True
cut_sample = False
if target == "multi":
cut_sample = True
method = "huggingface"
if method == "flair":
flair.device = torch.device(f'cuda:{device}')
whole_dataset = True
if dataset == "rcv1":
whole_dataset = False
dataset_size = 10000
@ex.automain
def run(_run, dataset, batch_size, representation, threshold, target, formatted, method, cut_sample):
"""
Sacred run method. Parameters are automatically retrieved from the configuration.
"""
data = mlmc.data.get(dataset)
hypothesis = "This example is {}."
if formatted:
# replace hypothesis to be more task specific
hypothesis = mlmc.data.dataset_formatter.SFORMATTER[dataset]("{}")
if dataset in ["trec6", "trec50", "dbpedia", "agnews", "yelpfull", "amazonfull"]:
formatted_classes = {}
for i, c in enumerate(data["classes"]):
# replace class to be more descriptive
formatted_class = mlmc.data.dataset_formatter.label_dicts[dataset].get(c, c)
formatted_classes[formatted_class] = i
data["classes"] = formatted_classes
classes_dict = data["classes"]
classes_list = [x for x in data["classes"].keys()]
# cut rcv1 to a shorter size as it would take too much to classify
if dataset == "rcv1":
data["test"] = mlmc.data.sampler(data["test"], absolute=10000)
test_dataloader = DataLoader(data["test"], batch_size=batch_size, shuffle=False)
zc = ZeroshotClassification(representation=representation, classes=data["classes"], threshold=threshold,
target=target, formatted=formatted, device=0)
if target == "multi":
initialized_metrics = zc.init_metrics(metrics="default_multilabel")
multi_class = True
else:
initialized_metrics = zc.init_metrics()
multi_class = False
threshold_ = mlmc.thresholds.get(threshold)
for sample in tqdm(test_dataloader):
results, truth_l, pred_l = [], [], []
if method == "huggingface":
if cut_sample:
with torch.no_grad():
results.append(zc.classifier(sample["text"][0][:zc.model.config.max_position_embeddings],
classes_list, multi_class=multi_class, hypothesis_template=hypothesis))
else:
with torch.no_grad():
results.append(zc.classifier(sample["text"][0], classes_list, multi_class=multi_class,
hypothesis_template=hypothesis))
elif method == "flair":
sentence = Sentence(sample["text"][0])
with torch.no_grad():
zc.model.predict_zero_shot(sentence, classes_list, multi_label=multi_class)
results_dict = {"labels": [], "scores": []}
r = [sentence.get_labels()]
for result in r:
for s in result:
# clean the output to get the correct input format for threshold application
results_dict["labels"].append(str(s).split(" (")[0])
results_dict["scores"].append(float(str(s).split(" (")[1].split(")")[0]))
results.append(results_dict)
# order the score tuple for each prediction according to the order of the class dictionary
for results_dict in results:
scores_list = [x for _, x in sorted(zip(results_dict["labels"], results_dict["scores"]),
key=lambda y: classes_dict.get(y[0]))]
scores = torch.tensor([scores_list])
truth_l.append(torch.squeeze(sample["labels"]))
pred_l.append(torch.squeeze(threshold_(scores)))
initialized_metrics.update_metrics((scores, torch.stack(truth_l), torch.stack(pred_l)))
initialized_metrics.compute()
initialized_metrics.log_sacred(_run, 1, "test")
metrics = initialized_metrics.print()
print(metrics)
from zeroshot import ex
device = "0"
representation = "tars-base"
method = "flair"
target = "single"
threshold = "max"
datasets = ["agnews", "dbpedia", "trec6", "trec50", "yahoo_answers", "amazonfull", "yelpfull"]
for dataset in datasets:
experiment = ex.run(options={'--name': f'{dataset}-TARS_base'},
config_updates={'representation': representation, 'target': target, 'dataset': dataset,
'threshold': threshold, 'device': device, 'method': method})
target = "multi"
threshold = "mcut"
datasets = ["blurbgenrecollection", "rcv1"]
for dataset in datasets:
experiment = ex.run(options={'--name': f'{dataset}-TARS_base'},
config_updates={'representation': representation, 'target': target, 'dataset': dataset,
'threshold': threshold, 'device': device, 'method': method})
from string import ascii_uppercase
import mlmc
import torch
from sacred import Experiment, SETTINGS
from sacred.observers import MongoObserver
from sacred.utils import apply_backspaces_and_linefeeds
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoTokenizer, T5ForConditionalGeneration
name = ""
user = ""
host = ""
database = ""
auth = ""
pw = ""
SETTINGS.CAPTURE_MODE = "sys"
ex = Experiment(name)
ex.observers.append(MongoObserver(url="localhost:27017", db_name=database))
ex.captured_out_filter = apply_backspaces_and_linefeeds
class ZeroshotClassification:
def __init__(self, representation, classes, target, format_):
self.model = T5ForConditionalGeneration.from_pretrained(representation)
self.tokenizer = AutoTokenizer.from_pretrained(representation)
self.config = {"representation": representation,
"classes": classes,
"target": target,
"format": format_, }
def init_metrics(self, metrics="default_singlelabel"):
"""
Initializes metrics to be used. If no metrics are specified then depending on the target the default metrics
for this target will be used. (see mlmc.metrics.metrics_config.items())
:param metrics: Name of the metrics (see mlmc.metrics.metrics_dict.keys() and mlmc.metrics.metrics_config.keys())
:return: A dictionary containing the initialized metrics
"""
metrics = mlmc.metrics.MetricsDict(metrics)
metrics.init(self.config)
metrics.reset()
return metrics
def run_model(self, input_string, **generator_args):
input_ids = self.tokenizer.encode(input_string, return_tensors="pt")
res = self.model.generate(input_ids, **generator_args)
return self.tokenizer.batch_decode(res, skip_special_tokens=True)
@ex.config
def ex_config():
device = 0
batch_size = 1
representation = "tals/albert-base-mnli"
dataset = "agnews"
target = "single"
threshold = "max"
if target == "multi":
threshold = "mcut"
formatted = True
cut_sample = False
if target == "multi":
cut_sample = True
method = "huggingface"
whole_dataset = True
if dataset == "rcv1":
whole_dataset = False
dataset_size = 10000
@ex.automain
def run(_run, dataset, formatted):
data = mlmc.data.get(dataset)
if formatted:
if dataset in ["trec6", "trec50", "dbpedia", "agnews", "yelpfull", "amazonfull"]:
formatted_classes = {}
for i, c in enumerate(data["classes"]):
formatted_class = mlmc.data.dataset_formatter.label_dicts[dataset].get(c, c)
formatted_classes[formatted_class] = i
data["classes"] = formatted_classes
classes = data["classes"]
if dataset == "rcv1":
data["test"] = mlmc.data.sampler(data["test"], absolute=10000)
test_dataloader = DataLoader(data["test"], batch_size=1, shuffle=False)
zc = ZeroshotClassification("allenai/unifiedqa-t5-small", classes=classes, target="single", format_=formatted)
initialized_metrics = zc.init_metrics()
threshold_ = mlmc.thresholds.get("max")
question = "What is this question about?"
choices = ""
"""
class_counter = 0
for char1 in ascii_uppercase:
for char2, class_ in zip(ascii_uppercase, classes.keys()):
if class_counter < len(classes.keys()):
choices += "("+char1+char2+") " + class_ + " "
class_counter += 1
"""
for char1, class_ in zip(ascii_uppercase, classes.keys()):
choices += "("+char1+") " + class_ + " "
for sample in tqdm(test_dataloader):
truth_l, pred_l = [], []
text = " ".join(sample["text"][0].replace("\n", "").split())
encoded_input = question + " \\n " + choices + "\\n " + text
# num_return_sequences = 5
# output = zc.run_model(encoded_input, num_beams=20, num_return_sequences=num_return_sequences, do_sample=True)
output = zc.run_model(encoded_input)
for class_ in output:
if class_ in classes:
predicted_class = class_
break
scores_list = [1 if predicted_class == class_ else 0 for class_ in classes]
scores = torch.tensor([scores_list])
truth_l.append(torch.squeeze(sample["labels"]))
pred_l.append(torch.squeeze(threshold_(scores)))
initialized_metrics.update_metrics((scores, torch.stack(truth_l), torch.stack(pred_l)))
initialized_metrics.compute()
initialized_metrics.log_sacred(_run, 1, "test")
metrics = initialized_metrics.print()
print(metrics)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment