Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
asv-ml
mlmc
Commits
4bc115fc
Commit
4bc115fc
authored
Jan 27, 2021
by
Janos Borst
Browse files
Merge branch '10-documentation' into 'dev'
Resolve "Documentation" See merge request
!21
parents
ee44e3c6
ed7405c0
Pipeline
#45361
failed with stage
in 16 minutes and 8 seconds
Changes
35
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
mlmc/data/__init__.py
View file @
4bc115fc
...
...
@@ -83,9 +83,21 @@ class MultiLabelDataset(Dataset):
self
.
target_dtype
=
target_dtype
def
__len__
(
self
):
"""
Returns the length of the dataset. The length is determined by the size
of the list containing the input text.
:return: Length of the dataset
"""
return
len
(
self
.
x
)
def
__getitem__
(
self
,
idx
):
"""
Retrieves a single entry from the dataset.
:param idx: Index of the entry
:return: Dictionary containing the text and labels of the entry
"""
if
self
.
one_hot
:
labels
=
[
self
.
classes
[
tag
]
for
tag
in
self
.
y
[
idx
]]
labels
=
torch
.
nn
.
functional
.
one_hot
(
torch
.
LongTensor
(
labels
),
len
(
self
.
classes
)).
sum
(
0
)
...
...
@@ -125,6 +137,12 @@ class MultiLabelDataset(Dataset):
return
{
"x"
:
self
.
x
,
"y"
:
self
.
y
,
"classes"
:
list
(
self
.
classes
.
keys
())}
def
__add__
(
self
,
o
):
"""
Merges dataset with another dataset.
:param o: Another dataset
:return: MultiLabelDataset containing x, y and classes of both datasets
"""
new_classes
=
list
(
set
(
list
(
self
.
classes
.
keys
())
+
list
(
o
.
classes
.
keys
())))
new_classes
.
sort
()
new_classes
=
dict
(
zip
(
new_classes
,
range
(
len
(
new_classes
))))
...
...
@@ -236,16 +254,77 @@ class MultiLabelDataset(Dataset):
class
SingleLabelDataset
(
MultiLabelDataset
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
"""
Class constructor. Creates an instance of SingleLabelDataset.
:param classes: A class mapping from label strings to successive indices
:param x: A list of the input text
:param y: A list of corresponding label sets
:param target_dtype: The final cast on the label output. (Some of torch's loss functions expect other data types. This argument defines
a function that is applied to the final output of the label tensors. (default: torch._cast_Float)
:param kwargs: Any additional information that is given by named keywords will be saved as metadata
Example:
```
x = ["This is a text about science",
"This is another text about philosophy"]
y = [['science'],
['politics']]
classes = {
"science": 0,
"philosophy": 1,
}
dataset = mlmc.data.SingleLabelDataset(x=x, y=y, classes=classes)
dataset[0]
```
"""
super
(
SingleLabelDataset
,
self
).
__init__
(
*
args
,
**
kwargs
)
assert
all
(
[
len
(
x
)
==
1
for
x
in
self
.
y
]),
"This is not a single label dataset. Some labels contain multiple labels."
def
__getitem__
(
self
,
idx
):
"""
Retrieves a single entry from the dataset.
:param idx: Index of the entry
:return: Dictionary containing the text and labels of the entry
"""
return
{
'text'
:
self
.
x
[
idx
],
'labels'
:
torch
.
tensor
(
self
.
classes
[
self
.
y
[
idx
][
0
]])}
class
MultiOutputMultiLabelDataset
(
Dataset
):
def
__init__
(
self
,
classes
,
x
,
y
,
target_dtype
=
torch
.
_cast_Float
,
**
kwargs
):
"""
Class constructor. Creates an instance of MultiOutputMultiLabelDataset.
:param classes: A class mapping from label strings to successive indices
:param x: A list of the input text
:param y: A list of corresponding label sets
:param target_dtype: The final cast on the label output. (Some of torch's loss functions expect other data types. This argument defines
a function that is applied to the final output of the label tensors. (default: torch._cast_Float)
:param kwargs: Any additional information that is given by named keywords will be saved as metadata
Example:
```
x = ["Text sample 1", "Text sample 2"]
y = [[["label0", "label1"], ["label2"]],
[["label1"], ["label1", "label2"]]]
classes = [{
"label0": 0,
"label1": 1
}, {
"label1": 0,
"label2": 1
}]
dataset = mlmc.data.MultiOutputMultiLabelDataset(x=x, y=y, classes=classes)
dataset[0]
```
"""
super
(
MultiOutputMultiLabelDataset
,
self
).
__init__
(
**
kwargs
)
if
isinstance
(
classes
,
dict
):
self
.
classes
=
[
classes
.
copy
()
for
_
in
range
(
len
(
y
[
0
]))]
...
...
@@ -263,9 +342,21 @@ class MultiOutputMultiLabelDataset(Dataset):
self
.
y
=
y
def
__len__
(
self
):
"""
Returns the length of the dataset. The length is determined by the size
of the list containing the input text.
:return: Length of the dataset
"""
return
len
(
self
.
x
)
def
__getitem__
(
self
,
item
):
"""
Retrieves a single entry from the dataset.
:param idx: Index of the entry
:return: Dictionary containing the text and labels of the entry
"""
result
=
{
"text"
:
self
.
x
[
item
]}
label_one_hot
=
[
torch
.
stack
([
torch
.
nn
.
functional
.
one_hot
(
torch
.
tensor
(
x
[
label
]),
len
(
x
))
for
label
in
labelset
],
0
)
for
...
...
@@ -273,9 +364,33 @@ class MultiOutputMultiLabelDataset(Dataset):
result
.
update
({
f
"labels_
{
i
}
"
:
v
.
sum
(
0
)
for
i
,
v
in
enumerate
(
label_one_hot
)})
return
result
class
MultiOutputSingleLabelDataset
(
Dataset
):
def
__init__
(
self
,
classes
,
x
,
y
=
None
,
**
kwargs
):
"""
Class constructor. Creates an instance of MultiOutputSingleLabelDataset.
:param classes: A class mapping from label strings to successive indices
:param x: A list of the input text
:param y: A list of corresponding label sets
:param kwargs: Any additional information that is given by named keywords will be saved as metadata
Example:
```
x = ["Text sample 1", "Text sample 2"]
y = [[["label0"], ["label2"]],
[["label1"], ["label2"]]]
classes = [{
"label0": 0,
"label1": 1
}, {
"label2": 0
}]
dataset = mlmc.data.MultiOutputSingleLabelDataset(x=x, y=y, classes=classes)
dataset[0]
```
"""
super
(
MultiOutputSingleLabelDataset
,
self
).
__init__
(
**
kwargs
)
if
y
is
not
None
:
if
isinstance
(
classes
,
dict
):
...
...
@@ -293,15 +408,36 @@ class MultiOutputSingleLabelDataset(Dataset):
self
.
y
=
y
def
__getitem__
(
self
,
item
):
"""
Retrieves a single entry from the dataset.
:param idx: Index of the entry
:return: Dictionary containing the text and labels of the entry
"""
if
self
.
y
is
None
:
return
{
'text'
:
self
.
x
[
item
]}
else
:
return
{
'text'
:
self
.
x
[
item
],
'labels'
:
torch
.
tensor
([
d
[
y
[
0
]]
for
d
,
y
in
zip
(
self
.
classes
,
self
.
y
[
item
])])}
def
__len__
(
self
):
"""
Returns the length of the dataset. The length is determined by the size
of the list containing the input text.
:return: Length of the dataset
"""
return
len
(
self
.
x
)
def
reduce
(
self
,
subset
):
"""
Reduces the dataset to a subset of the classes.
The resulting dataset will only contain instances with at least one label that appears in the subset argument.
The subset can also provide a new mapping from the new label names to indices (dict).
All labels not in subset will be removed. Instances with an empty label set will be removed.
:param subset: A mapping of classes to indices
"""
assert
len
(
subset
)
==
len
(
self
.
classes
),
"Subset and existing classes have varying outputsizes"
assert
all
([
all
([
x
in
c
.
keys
()
for
x
in
s
.
keys
()])
for
s
,
c
in
zip
(
subset
,
self
.
classes
)]),
"Subset contains classes not present in dataset"
...
...
@@ -312,6 +448,12 @@ class MultiOutputSingleLabelDataset(Dataset):
self
.
classes
=
subset
def
__add__
(
self
,
o
):
"""
Merges dataset with another dataset.
:param o: Another dataset
:return: MultiOutputSingleLabelDataset containing x, y and classes of both datasets
"""
new_classes
=
[
list
(
set
(
list
(
c1
.
keys
())
+
list
(
c2
.
keys
())))
for
c1
,
c2
in
zip
(
self
.
classes
,
o
.
classes
)]
new_classes
=
[
dict
(
zip
(
c
,
range
(
len
(
c
))))
for
c
in
new_classes
]
...
...
@@ -404,6 +546,12 @@ def get_singlelabel_dataset(name):
return
get_dataset
(
name
,
type
=
SingleLabelDataset
,
ensure_valid
=
False
,
target_dtype
=
torch
.
_cast_Float
)
def
get
(
name
):
"""
Universal get function for datasets.
:param name: Name of the dataset
:return: A dataset if the name exists
"""
try
:
try
:
return
get_singlelabel_dataset
(
name
)
...
...
@@ -414,6 +562,12 @@ def get(name):
print
(
register
.
keys
())
def
is_multilabel
(
x
):
"""
Checks if input is a multilabel dataset.
:param x: A dataset
:return: True if multilabel, else False.
"""
return
type
(
x
)
in
(
MultiLabelDataset
,
MultiOutputMultiLabelDataset
)
## Sampler import
...
...
mlmc/data/data_loaders.py
View file @
4bc115fc
...
...
@@ -22,6 +22,12 @@ CACHE = Path.home() / ".mlmc" / "datasets"
URL
=
"https://aspra29.informatik.uni-leipzig.de:9090/"
def
_load_from_tmp
(
dataset
):
"""
Loads a dataset from cache.
:param dataset: Name of the dataset
:return: Tuple of form (data, classes) if dataset exists in cache, else None
"""
if
not
Path
.
exists
(
CACHE
):
Path
.
mkdir
(
CACHE
)
if
Path
.
is_file
(
CACHE
/
dataset
):
...
...
@@ -33,6 +39,13 @@ def _load_from_tmp(dataset):
return
None
def
_save_to_tmp
(
dataset
,
data
):
"""
Saves a dataset to cache.
:param dataset: Name of the dataset
:param data: Tuple of form (data, classes)
:return: Path to the saved dataset if dataset didn't exist in cache, else False.
"""
if
not
Path
.
exists
(
CACHE
):
Path
.
mkdir
(
CACHE
)
if
not
Path
.
is_file
(
CACHE
/
dataset
):
...
...
@@ -48,6 +61,12 @@ def _save_to_tmp(dataset, data):
def
load_aapd
():
"""
Loads AAPD (Arxiv Academic Paper Dataset) from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:return: Tuple of form (data, classes)
"""
data
=
_load_from_tmp
(
"aapd"
)
if
data
is
not
None
:
return
data
...
...
@@ -76,6 +95,14 @@ def load_aapd():
def
load_rcv1
(
path
=
None
):
"""
Loads rcv1 (Reuters Corpus Volume I) from cache. This dataset can't be downloaded automatically so a path to the
archive has to be provided when first called.
Task: Multilabel Classification
:param path: Path to unprocessed corpus
:return: Tuple of form (data, classes)
"""
data
=
_load_from_tmp
(
"rcv1"
)
if
data
is
not
None
:
return
data
...
...
@@ -163,6 +190,14 @@ def load_rcv1(path=None):
def
load_wiki30k
(
path
=
"/disk1/users/jborst/Data/Test/MultiLabel/wiki30k"
):
"""
Loads wiki30k from cache. This dataset can't be downloaded automatically so a path to the archive has to be provided
when first called.
Task: Multilabel Classification
:param path: Path to pickled dataset
:return: Tuple of form (data, classes)
"""
import
pickle
with
open
(
Path
(
path
)
/
"wiki30k_raw_text.p"
,
"rb"
)
as
f
:
content
=
pickle
.
load
(
f
)
...
...
@@ -175,6 +210,12 @@ def load_wiki30k(path="/disk1/users/jborst/Data/Test/MultiLabel/wiki30k"):
def
load_eurlex
():
"""
Loads EUR-Lex from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:return: Tuple of form (data, classes)
"""
data
=
_load_from_tmp
(
"eurlex"
)
if
data
is
not
None
:
return
data
...
...
@@ -206,6 +247,13 @@ def load_eurlex():
def
load_huffpost
(
test_split
=
0.25
):
"""
Loads Huffington Post dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:param test_split: Size of test split as fraction of 1
:return: Tuple of form (data, classes)
"""
data
=
_load_from_tmp
(
"huffpost"
)
if
data
is
not
None
:
return
data
...
...
@@ -237,6 +285,13 @@ def load_huffpost(test_split=0.25):
def
load_moviesummaries
(
test_split
=
0.25
):
"""
Loads Movie Summaries dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:param test_split: Size of test split as fraction of 1
:return: Tuple of form (data, classes)
"""
data
=
_load_from_tmp
(
"moviesummaries"
)
if
data
is
not
None
:
return
data
...
...
@@ -285,6 +340,12 @@ def load_moviesummaries(test_split=0.25):
# ----------------------------------------------
def
load_blurbgenrecollection
():
"""
Loads Blurb Genre Collection dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"https://fiona.uni-hamburg.de/ca89b3cf/blurbgenrecollectionen.zip"
data
=
_load_from_tmp
(
"blurbgenrecollection"
)
if
data
is
not
None
:
...
...
@@ -319,6 +380,13 @@ def load_blurbgenrecollection():
def
load_blurbgenrecollection_de
():
"""
Loads German version of Blurb Genre Collection dataset from cache. If it doesn't exist in cache the dataset will be
downloaded.
Task: Multilabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"https://www.inf.uni-hamburg.de/en/inst/ab/lt/resources/data/germeval-2019-hmc/germeval2019t1-public-data-final.zip"
data
=
_load_from_tmp
(
"blurbgenrecollection_de"
)
if
data
is
not
None
:
...
...
@@ -369,6 +437,12 @@ def load_webofscience():
# zipfile = ZipFile(BytesIO(resp.read()))
def
load_20newsgroup
():
"""
Loads 20newsgroup dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Singlelabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"http://qwone.com/~jason/20Newsgroups/20news-bydate.tar.gz"
data
=
_load_from_tmp
(
"20newsgroup"
)
if
data
is
not
None
:
...
...
@@ -419,6 +493,12 @@ def load_20newsgroup():
return
data
,
classes
def
load_agnews
():
"""
Loads AG News dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Singlelabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"https://s3.amazonaws.com/fast-ai-nlp/ag_news_csv.tgz"
data
=
_load_from_tmp
(
"agnews"
)
if
data
is
not
None
:
...
...
@@ -460,6 +540,12 @@ def load_agnews():
return
data
,
classes
def
load_dbpedia
():
"""
Loads DBpedia dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Singlelabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz"
data
=
_load_from_tmp
(
"dbpedia"
)
if
data
is
not
None
:
...
...
@@ -501,6 +587,12 @@ def load_dbpedia():
return
data
,
classes
def
load_ohsumed
():
"""
Loads Ohsumed dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Multilabel Classification
:return: Tuple of form (data, classes)
"""
url
=
"http://disi.unitn.eu/moschitti/corpora/ohsumed-first-20000-docs.tar.gz"
url_classes
=
"http://disi.unitn.eu/moschitti/corpora/First-Level-Categories-of-Cardiovascular-Disease.txt"
data
=
_load_from_tmp
(
"ohsumed"
)
...
...
@@ -589,6 +681,13 @@ def load_ohsumed():
return
data
,
classes
def
export
(
data
,
classes
,
path
=
Path
(
"./export"
)):
"""
Exports the data and class dictionaries of a dataset to text files.
:param data: Data dictionary containing training, validation and test splits
:param classes: Class dictionary mapping labels to indices
:param path: Path to write the text files to
"""
path
=
Path
(
path
)
if
not
path
.
exists
():
path
.
mkdir
()
...
...
@@ -607,6 +706,12 @@ def export(data, classes, path=Path("./export")):
o
.
writelines
([
x
+
"
\n
"
for
x
in
classes
.
keys
()])
def
load_yahoo_answers
():
"""
Loads Yahoo Answers dataset from cache. If it doesn't exist in cache the dataset will be downloaded.
Task: Singlelabel Classification
:return: Tuple of form (data, classes)
"""
url
=
(
URL
+
"/yahoo_answers_csv.tar.gz"
).
replace
(
"https"
,
"http"
)
data
=
_load_from_tmp
(
"yahoo_answers"
)
if
data
is
not
None
:
...
...
mlmc/data/transformer.py
View file @
4bc115fc
...
...
@@ -5,8 +5,8 @@ import torch
def
clean
(
x
):
"""
Remove every character in a string that is not ascii, punctuation or whitespace
:param x:
:return:
:param x:
String
:return:
Cleaned string
"""
import
string
return
""
.
join
([
c
for
c
in
x
if
c
in
string
.
ascii_letters
+
string
.
punctuation
+
" "
])
...
...
mlmc/graph/__init__.py
View file @
4bc115fc
"""
Provides functions for loading predefined graphs
"""
from
.helpers
import
cooc_matrix
from
.embeddings
import
get_nmf
,
get_node2vec
,
get_random_projection
from
.graph_loaders
import
load_wordnet
,
load_wordnet_sample
,
load_NELL
,
load_elsevier
,
load_conceptNet
,
load_stw
,
load_nasa
,
\
...
...
@@ -18,6 +21,12 @@ register = {
def
get_graph
(
name
:
str
):
"""
Loads a graph.
:param name: Name of the graph (see register.keys())
:return: Function call of the chosen graph
"""
fct
=
register
.
get
(
name
)
if
fct
is
None
:
raise
FileNotFoundError
...
...
@@ -26,8 +35,12 @@ def get_graph(name: str):
import
networkx
as
nx
def
get
(
name
:
[
list
,
str
]):
"""
Loads a graph. If multiple names are provided the union of the graphs is returned.
:param name: Name(s) of the graph(s) to compose (see register.keys())
:return: Merged graph
"""
if
isinstance
(
name
,
str
):
name
=
[
name
]
return
nx
.
compose_all
([
get_graph
(
x
)
for
x
in
name
])
mlmc/graph/embeddings.py
View file @
4bc115fc
...
...
@@ -7,6 +7,14 @@ from sklearn.decomposition import NMF
def
subgraph_extract
(
X
,
graph
,
subnodelist
):
"""
Extracts a subset of node embeddings from a graph.
:param X: Node embeddings of graph
:param graph: A networkx graph
:param subnodelist: Dictionary of nodes for which the embedding will be returned
:return: Embeddings of all nodes in subnodelist
"""
new
=
np
.
zeros_like
(
X
)
for
i
,
nm
in
enumerate
(
graph
.
nodes
):
if
nm
in
subnodelist
.
keys
():
...
...
mlmc/graph/graph_insert.py
View file @
4bc115fc
...
...
@@ -6,9 +6,25 @@ import networkx as nx
from
sklearn.feature_extraction.text
import
TfidfVectorizer
def
ngrams
(
x
,
k
):
"""
Splits text into n-grams.
:param x: A string
:param k: Size of each n-gram
:return: List of n-grams
"""
return
[
x
[
i
:(
i
+
k
)]
for
i
in
range
(
len
(
x
)
-
k
+
1
)]
def
_mh
(
x
,
k
,
num_perm
,
wk
):
"""
Calculates MinHash of a string for estimating Jaccard similarity using shingling. Multiple shingling sizes may be
specified.
:param x: A string
:param k: Shingling size(s)
:param num_perm: Number of permutation functions
:return: MinHash
"""
from
datasketch
import
MinHash
,
MinHashLSH
x
=
x
.
upper
()
k
=
k
if
isinstance
(
k
,
(
tuple
,
list
))
else
[
k
]
...
...
@@ -22,6 +38,16 @@ def _mh(x, k, num_perm, wk):
return
m1
def
_subwordsplits_mh
(
x
,
k
,
num_perm
,
wk
):
"""
Calculates MinHash of a string for estimating Jaccard similarity using shingling. The initial shingling is further
split in substrings. Multiple shingling sizes may be specified.
:param x: A string
:param k: Shingling size(s)
:param num_perm: Number of permutation functions
:param wk: Shingling size(s) of the substrings
:return: MinHash
"""
x
=
x
.
upper
()
from
datasketch
import
MinHash
,
MinHashLSH
k
=
k
if
isinstance
(
k
,
(
tuple
,
list
))
else
[
k
]
...
...
@@ -35,6 +61,19 @@ def _subwordsplits_mh(x, k,num_perm, wk):
return
m1
def
edges
(
l1
,
l2
,
_mh
,
num_perm
=
48
,
n
=
(
2
,
3
),
threshold
=
0.65
,
wk
=
(
1
,
2
,
3
)):
"""
Compares two lists using Jaccard similarity and shingling. Multiple shingling sizes may be specified.
:param l1: List of nodes
:param l2: List of nodes
:param _mh: MinHash function (_mh or _subwordsplits_mh)
:param num_perm: Number of permutation functions
:param n: Shingling size(s)
:param threshold: Jaccard similarity threshold
:param wk: Shingling size(s) of the substrings
:return: Dictionary containing all objects of l1 as keys and the corresponding objects of l2 above the specified
threshold as values in list form.
"""
# helper for
from
datasketch
import
MinHash
,
MinHashLSH
...
...
@@ -48,6 +87,7 @@ def edges(l1, l2,_mh, num_perm=48, n=(2, 3), threshold=0.65, wk=(1,2,3)):
def
graph_insert_labels
(
data
,
kb
,
explanations
):
# TODO: Documentation
l1
=
list
(
data
[
"classes"
].
keys
())
l2
=
list
(
kb
.
nodes
())
...
...
mlmc/graph/graph_loaders.py
View file @
4bc115fc
...
...
@@ -14,6 +14,14 @@ from ..data.data_loaders import _save_to_tmp, _load_from_tmp
def
transform
(
x
,
rg
,
lang
=
"en"
):
"""
Transforms rdflib terms into plain text.
:param x: A term (see rdflib.term)
:param rg: An RDF Graph
:param lang: Language of the wikidata article
:return: String representation of term
"""
if
isinstance
(
x
,
rdflib
.
term
.
URIRef
):
from
rdflib.namespace
import
SKOS
if
str
(
SKOS
)
in
str
(
x
):
return
[
x
.
split
(
"#"
)[
-
1
]]
...
...
@@ -30,12 +38,25 @@ def transform(x, rg, lang="en"):
def
transform_triples
(
rg
,
lang
=
"en"
):
"""
Transforms rdflib triples into plain text.
:param rg: An RDF Graph
:param lang: Language of the wikidata article
:return: List containing transformed triples
"""
new_list
=
[[
transform
(
x
,
rg
,
lang
)
for
x
in
t
]
for
t
in
tqdm
(
rg
)]
new_list
=
[
x
for
x
in
new_list
if
(
not
(
any
([
r
==
[]
for
r
in
x
])
or
any
([
r
is
None
for
r
in
x
])))
and
(
"prefLabel"
not
in
x
[
1
])]
return
new_list
def
get_wikidata_desc
(
x
):
"""
Queries wikidata using ID's to retrieve descriptions.
:param x: List of ID's
:return: Dictionary of input ID's corresponding to its wikidata descriptions
"""
import
requests
output
=
[]
...
...
@@ -61,6 +82,11 @@ def get_wikidata_desc(x):
return
descriptions
def
load_mesh
():
"""
Loading the MeSH thesaurus graph as a networkx.DiGraph.