Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
mappings-release.sh 3.82 KiB
#!/bin/bash

set -e

SCRIPTROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

# [CONFIG]

#extraction-framework
EXTRACTIONFRAMEWORKDIR="/home/extractor/extraction-framework";

#extracted dumps (basedir)
BASEDIR="/data/extraction/wikidumps";

#databus-maven-plugin project, containing release pom
#https://github.com/dbpedia/databus-maven-plugin/blob/master/dbpedia/mappings/pom.xml
DATABUSMAVENPOMDIR="/data/extraction/databus-maven-plugin/dbpedia/generic";

#override release pom.xml properties
RELEASEPUBLISHER="https://vehnem.github.io/webid.ttl#this";
RELEASEPACKAGEDIR="/data/extraction/release";
RELEASEDOWNLOADURL="http://dbpedia-mappings.tib.eu/release";

#logging directory
LOGS="/data/extraction/logs/$(date +%Y-%m-%d)";
mkdir -p $LOGS;

# [FUNCTIONS]

execWithLogging() {
    #arg(0) = $1 := "function name"
    $1 > "$LOGS/$1.out" 2> "$LOGS/$1.err";
}

downloadOntology() {
    cd $EXTRACTIONFRAMEWORKDIR/core;
    ../run download-ontology;
}

downloadMappings() {
    cd $EXTRACTIONFRAMEWORKDIR/core;
    ../run download-mappings;
}

downloadDumps() {
    cd $EXTRACTIONFRAMEWORKDIR/dump;
    ../run download $SCRIPTROOT/download.generic.properties;
}

buildExtractionFramework() {
    cd $EXTRACTIONFRAMEWORKDIR;
    mvn clean install;
}

runExtraction() {
    cd $EXTRACTIONFRAMEWORKDIR/dump;
    ../run sparkextraction $SCRIPTROOT/extraction.mappings.properties;
}

resolveTransitiveLinks() {
    cd $EXTRACTIONFRAMEWORKDIR/scripts;
    ../run ResolveTransitiveLinks $BASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
}

mapObjectUris() {
    cd $EXTRACTIONFRAMEWORKDIR/scripts;
    ../run MapObjectUris $BASEDIR redirects_transitive .ttl.bz2 disambiguations,infobox-properties,page-links,persondata,topical-concepts _redirected .ttl.bz2 @downloaded;
}

postProcessing() {
    echo "$(date) | extraction-framework| resole transitive links" >&2;
    execWithLogging resolveTransitiveLinks;
    echo "$(date) | extraction-framework| map object uris" >&2;
    execWithLogging mapObjectUris;
}

prepareRelease() {
    #own config
    cd $SCRIPTROOT;
    collectExtraction.sh;
}

setNewVersion() {
    cd $DATABUSMAVENPOMDIR;
    mvn versions:set -DnewVersion=$(ls * | grep '^[0-9]\{4\}.[0-9]\{2\}.[0-9]\{2\}$' | sort -u  | tail -1);
}

deployRelease() {
    cd $DATABUSMAVENPOMDIR;
    mvn deploy \
	-Ddatabus.publisher="$RELEASEPUBLISHER" \
	-Ddatabus.packageDirectory="$RELEASEPACKAGEDIR/\${project.groupId}/\${project.artifactId}" \
	-Ddatabus.downloadUrlPath="$RELEASEDOWNLOADURL/\${project.groupId}/\${project.artifactId}/\${project.version}";
}

compressLogs() {
    for f in $(find $LOGS -type f ); do lbzip2 $f; done;
}

# [MAIN]

main() {

    echo "-------------------------" >&2;
    echo " Mappingbased Extraction " >&2;
    echo "-------------------------" >&2;

    #download
    echo "$(date) | extraction-framework | start download ontology" >&2;
    execWithLogging downloadOntology;
    echo "$(date) | extraction-framework | start download mappings" >&2;
    execWithLogging downloadMappings;
    echo "$(date) | extraction-framework | start download dumps" >&2;
    execWithLogging downloadDumps;

    #extraction
    echo "$(date) | extraction-framework | mvn clean install" >&2;
    execWithLogging buildExtractionFramework;
    echo "$(date) | extraction-framework | start extraction" >&2;
    execWithLogging runExtraction;
    #   echo "$(date) | extraction-framework | post processing" >&2;
    #	postProcessing;

    #release
    echo "$(date) | databus-maven-plugin | collect extracted datasets" >&2;
    execWithLogging prepareRelease;
    echo "$(date) | databus-maven-plugin | mvn versions:set" >&2;
    execWithLogging setNewVersion;
    echo "$(date) | databus-maven-plugin | mvn deploy" >&2;
    execWithLogging deployRelease;

    #cleanup
    echo "$(date) | main | compress log files" >&2;
    compressLogs;
}

execWithLogging main;