From 0932ec42759afe4b417c41d33cd7f73ed53dd96b Mon Sep 17 00:00:00 2001 From: Marvin Hofer <vehnem@yahoo.de> Date: Tue, 3 Sep 2019 19:13:39 +0200 Subject: [PATCH] generic-release.sh refactor --- generic/schedule/collectExtraction.sh | 133 ++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 generic/schedule/collectExtraction.sh diff --git a/generic/schedule/collectExtraction.sh b/generic/schedule/collectExtraction.sh new file mode 100644 index 0000000..74e01f6 --- /dev/null +++ b/generic/schedule/collectExtraction.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +set -e + +# directory of extracted dumps. (basedir) +BASEDIR="/data/extraction/wikidumps/" + +# databus-maven-plugin directory +DATABUSMVNDIR="/data/extraction/databus-maven-plugin/dbpedia/generic" + +# databus version. empty for all +DUMPDATE= + +# if true, just show what will happen +TRYRUN=false + +#__Maps__ + +mapLang() { + + lang=$(echo "$1" | sed 's|wiki||g') + + case "$lang" in + + "bat_smg") echo "_lang=batsmg";; + "zh_min_nan") echo "_lang=nan";; + "zh_yue") echo "_lang=yue";; + + "wikidata") echo "";; + + *) echo "_lang=$lang";; + esac +} + +mapExtraction() { + + case "$1" in + "article-templates-nested") echo "article-templates_nested";; + + "citation-data") echo "citations_data";; + "citation-links") echo "citations_links";; + + "commons-page-links") echo "commons-sameas-links";; + + "page-ids") echo "page_ids";; + "page-length") echo "page_length";; + "page-links") echo "wikilinks";; + + "article-categories") echo "categories_articles";; + "category-labels") echo "categories_labels";; + "skos-categories") echo "categories_skos";; + + "revision-ids") echo "revisions_ids";; + "revision-uris") echo "revisions_uris";; + + *) echo "$1";; + esac +} + +#__Functions__ + +printStart() { + + >&2 echo "-----------------" + >&2 echo " Generic Release " + >&2 echo "-----------------" +} + +copyToMavenPlugin() { + + # https://www.tldp.org/LDP/abs/html/string-manipulation.html#Substring%20Removal#Substring Removal + # ${string##/*} + + for path in $(find "$BASEDIR" -name "*.ttl.bz2"); do + + file="${path##*/}" + + version="${file#*-}" + version="${version%%-*}" + version="${version:0:4}.${version:4:2}.${version:6:2}" + + if [ "$DUMPDATE" = "$version" ] || [ -z "$DUMPDATE" ] ; then + + lang="${file%%-*}" + + extraction="${file#*-*-}" + extraction="${extraction%%.*}" + + extension="${file#*.}" + + mapped="$(mapExtraction $extraction)" + + artifact="${mapped%%_*}" + + contVars="$(mapLang $lang)" + if [[ "$mapped" == *"_"* ]]; then + contVars="${contVars}_${mapped#*_}" + fi + + targetArVe="$artifact/$version" + targetFile="$artifact$contVars.$extension" + + if [ -d "$DATABUSMVNDIR/$artifact" ]; then + + if [ ! -d "$DATABUSMVNDIR/$targetArVe" ]; then + + mkdir -p "$DATABUSMVNDIR/$targetArVe" + fi + + if $TRYRUN; then + echo "$path -> $DATABUSMVNDIR/$targetArVe/$targetFile" + else + cp -vn "$path" "$DATABUSMVNDIR/$targetArVe/$targetFile" + fi + else + + >&2 echo "unmapped/notexist artifact: $artifact" + fi + fi + done +} + + +#__Main__ + +main() { + + printStart + copyToMavenPlugin + + #TODO add release +} +main -- GitLab