Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
asv-ml
mlmc
Commits
6655fa0e
Commit
6655fa0e
authored
Mar 09, 2020
by
Janos Borst
Browse files
master merge
parent
4bd340a6
Pipeline
#36804
passed with stages
in 2 minutes and 55 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
mlmc/models/ZAGCNNLMAtt.py
View file @
6655fa0e
...
...
@@ -78,7 +78,7 @@ class ZAGCNNLM2(TextClassificationAbstract):
from
..representation
import
get_word_embedding_mean
l
=
get_word_embedding_mean
(
[
" "
.
join
(
re
.
split
(
"[/ _-]"
,
x
.
lower
()))
for
x
in
self
.
classes
.
keys
()],
"
/disk1/users/jborst/Data/Embeddings/glove/en/glove.6B.300d_small.txt
"
)
"
glove300
"
)
if
scale
==
"mean"
:
print
(
"subtracting mean"
)
...
...
mlmc/representation/representations.py
View file @
6655fa0e
import
numpy
as
np
from
transformers
import
*
import
torch
from
pathlib
import
Path
from
urllib
import
error
from
urllib.request
import
urlopen
from
io
import
BytesIO
from
zipfile
import
ZipFile
EMBEDDINGCACHE
=
Path
.
home
()
/
".mlmc"
/
"embedding"
MODELS
=
{
"bert"
:
(
BertModel
,
BertTokenizer
,
'bert-large-uncased'
),
"bert_cased"
:
(
BertModel
,
BertTokenizer
,
'bert-base-cased'
),
...
...
@@ -16,8 +24,26 @@ MODELS = {"bert": (BertModel, BertTokenizer, 'bert-large-uncased'),
def
load_static
(
embedding
=
"/disk1/users/jborst/Data/Embeddings/glove/en/glove.6B.50d_small.txt"
):
glove
=
np
.
loadtxt
(
embedding
,
dtype
=
'str'
,
comments
=
None
)
def
load_static
(
embedding
=
"glove300"
):
embeddingfiles
=
{
"glove50"
:
"glove.6B.50d.txt"
,
"glove100"
:
"glove.6B.100d.txt"
,
"glove200"
:
"glove.6B.200d.txt"
,
"glove300"
:
"glove.6B.300d.txt"
}
if
not
(
EMBEDDINGCACHE
/
embeddingfiles
[
embedding
]).
exists
():
URL
=
"http://nlp.stanford.edu/data/glove.6B.zip"
try
:
resp
=
urlopen
(
URL
)
except
error
.
HTTPError
:
print
(
error
.
HTTPError
)
return
None
assert
resp
.
getcode
()
==
200
,
"Download not found Error: (%i)"
%
(
resp
.
getcode
(),)
print
(
"Downloading glove vectors... This may take a while..."
)
zipfile
=
ZipFile
(
BytesIO
(
resp
.
read
()))
zipfile
.
extractall
(
EMBEDDINGCACHE
)
fp
=
EMBEDDINGCACHE
/
embeddingfiles
[
embedding
]
glove
=
np
.
loadtxt
(
fp
,
dtype
=
'str'
,
comments
=
None
)
glove
=
glove
[
np
.
unique
(
glove
[:,:
1
],
axis
=
0
,
return_index
=
True
)[
1
]]
words
=
glove
[:,
0
]
weights
=
glove
[:,
1
:].
astype
(
'float'
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment