Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
set -e
# [CONFIG]
#extracted dumps (basedir)
BASEDIR="/data/extraction/wikidumps/"
#databus-maven-plugin project, containing release pom
DATABUSMVNPOMDIR="/data/extraction/databus-maven-plugin/dbpedia/wikidata"
#explicit databus version or empty for all
DUMPDATE=
#if true show dumy output
TRYRUN=false
# [TODO]
echo "----------------------------"
echo "Prepare Wikidata for Databus"
echo "----------------------------"
cd $BASEDIR
files=$(find wikidatawiki -name "*.ttl.bz2" | sort -u )
function exceptDataset {
case $1 in
"duplicate-iri-split") echo "debug";;
"r2r-mapping-errors") echo "debug";;
"type-like-statements") echo "debug";;
*) echo "$1";;
esac
}
function exceptName {
case $1 in
"duplicate-iri-split") echo "debug_duplicateirisplit";;
"r2r-mapping-errors") echo "debug_r2rmappingerrors";;
"type-like-statements") echo "debug_typelikestatements";;
*) echo "$1";;
esac
}
for file in $files; do
name=${file##*/}; name=$(echo $name | cut -d"." -f1)
dumpVersion=${file%/*}; dumpVersion=${dumpVersion##*/}
version="${dumpVersion:0:4}.${dumpVersion:4:2}.${dumpVersion:6:2}"
CONTVAR=""
if [[ $name == *"-nmw"* ]]; then
CONTVAR="${CONTVAR}_nmw"
fi
if [[ $name == *"-reified"* ]]; then
CONTVAR="${CONTVAR}_reified"
fi
if [[ $name == *"-reified-qualifiers"* ]]; then
CONTVAR="${CONTVAR}_qualifiers"
fi
if [[ $name == *"-redirected"* ]]; then
CONTVAR="${CONTVAR}_redirected"
fi
if [[ $name == *"-length"* ]]; then
CONTVAR="${CONTVAR}_length"
fi
if [[ $name == *"-ids"* ]]; then
CONTVAR="${CONTVAR}_ids"
fi
if [[ $name == *"-uris"* ]]; then
CONTVAR="${CONTVAR}_uris"
fi
if [[ $name == *"-transitive"* ]]; then
CONTVAR="${CONTVAR}_transitive"
fi
dataset=$(echo $name | sed -e "s/wikidatawiki-$dumpVersion-//g; s/-nmw//g; s/wikidata-//g; s/-reified//g; s/-qualifiers//g; s/-redirected//g; s/-ids//g; s/-length//g; s/-uris//g; s/-transitive//g; s/transitive-//g")
new_name="${dataset}${CONTVAR}"
if [[ $dataset == *"interlanguage-links"* ]]; then
new_name="interlanguange-links_lang="$(echo $dataset | sed "s/interlanguage-links-//g")
dataset="interlanguange-links"
fi
dataset=$(exceptDataset $dataset)
new_name=$(exceptName $new_name)
new_name=$new_name$(echo ${file##*/} | sed "s/$name//g")
mkdir -p $DATABUSMVNPOMDIR/$dataset/$version/
cp -vn $file $DATABUSMVNPOMDIR/$dataset/$version/$new_name
done