#!/bin/bash

set -e

SCRIPTROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )";

# [CONFIG]

#extraction-framework
EXTRACTIONFRAMEWORKDIR="/home/extractor/extraction-framework";

#extracted dumps (basedir)
BASEDIR="/data/extraction/wikidumps";

#databus-maven-plugin project, containing release pom
#https://github.com/dbpedia/databus-maven-plugin/blob/master/dbpedia/generic/pom.xml
DATABUSMAVENPOMDIR="/data/extraction/databus-maven-plugin/dbpedia/generic";

#override release pom.xml properties
RELEASEPUBLISHER="https://vehnem.github.io/webid.ttl#this";
RELEASEPACKAGEDIR="/data/extraction/release";
RELEASEDOWNLOADURL="http://dbpedia-generic.tib.eu/release";
RELEASELABELPREFIX="(pre-release)"
RELEASECOMMENTPREFIX="(MARVIN is the DBpedia bot, that runs the DBpedia Information Extraction Framework (DIEF) and releases the data as is, i.e. unparsed, unsorted, not redirected for debugging the software. After its releases, data is cleaned and persisted under the dbpedia account.)"

#logging directory
LOGS="/data/extraction/logs/$(date +%Y-%m-%d)";
mkdir -p $LOGS;

# [FUNCTIONS]

execWithLogging() {
    #arg(0) = $1 := "function name"
    $1 > "$LOGS/$1.out" 2> "$LOGS/$1.err";
}

downloadOntology() {
    cd $EXTRACTIONFRAMEWORKDIR/core;
    ../run download-ontology;
}

downloadMappings() {
    cd $EXTRACTIONFRAMEWORKDIR/core;
    ../run download-mappings;
}

downloadDumps() {
    cd $EXTRACTIONFRAMEWORKDIR/dump;
    ../run download $SCRIPTROOT/download.generic.properties;
}

buildExtractionFramework() {
    cd $EXTRACTIONFRAMEWORKDIR;
    mvn clean install;
}

runExtraction() {
    cd $EXTRACTIONFRAMEWORKDIR/dump;
    ../run sparkextraction $SCRIPTROOT/sparkextraction.generic.properties;
    ../run sparkextraction $SCRIPTROOT/sparkextraction.generic.en.properties;
}

resolveTransitiveLinks() {
    cd $EXTRACTIONFRAMEWORKDIR/scripts;
    ../run ResolveTransitiveLinks $BASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
}

mapObjectUris() {
    cd $EXTRACTIONFRAMEWORKDIR/scripts;
    ../run MapObjectUris $BASEDIR redirects_transitive .ttl.bz2 disambiguations,infobox-properties,page-links,persondata,topical-concepts _redirected .ttl.bz2 @downloaded;
}

postProcessing() {
    echo "$(date) | extraction-framework| resole transitive links" >&2;
    execWithLogging resolveTransitiveLinks;
    echo "$(date) | extraction-framework| map object uris" >&2;
    execWithLogging mapObjectUris;
}

prepareRelease() {
    #own config
    cd $SCRIPTROOT;
    bash collectExtraction.sh;
}

setNewVersion() {
    cd $DATABUSMAVENPOMDIR;
    mvn versions:set -DnewVersion=$(ls * | grep '^[0-9]\{4\}.[0-9]\{2\}.[0-9]\{2\}$' | sort -u  | tail -1);
}

deployRelease() {
    cd $DATABUSMAVENPOMDIR;
    mvn deploy \
	-Ddatabus.publisher="$RELEASEPUBLISHER" \
	-Ddatabus.packageDirectory="$RELEASEPACKAGEDIR/\${project.groupId}/\${project.artifactId}" \
	-Ddatabus.downloadUrlPath="$RELEASEDOWNLOADURL/\${project.groupId}/\${project.artifactId}/\${project.version}" \
	-Ddatabus.labelPrefix="$RELEASELABELPREFIX" \
	-Ddatabus.commentPrefix="$RELEASECOMMENTPREFIX";
}

compressLogs() {
    for f in $(find $LOGS -type f ); do lbzip2 $f; done;
}

# [MAIN]

main() {

    echo "--------------------" >&2;
    echo " Generic Extraction " >&2;
    echo "--------------------" >&2;

    #download
    echo "$(date) | extraction-framework | start download ontology" >&2;
    execWithLogging downloadOntology;
    echo "$(date) | extraction-framework | start download mappings" >&2;
    execWithLogging downloadMappings;
    echo "$(date) | extraction-framework | start download dumps" >&2;
    execWithLogging downloadDumps;

    #extraction
    echo "$(date) | extraction-framework | mvn clean install" >&2;
    execWithLogging buildExtractionFramework;
    echo "$(date) | extraction-framework | start extraction" >&2;
    execWithLogging runExtraction;
    echo "$(date) | extraction-framework | post processing" >&2;
    postProcessing;

    #release
    echo "$(date) | databus-maven-plugin | collect extracted datasets" >&2;
    execWithLogging prepareRelease;
    echo "$(date) | databus-maven-plugin | mvn versions:set" >&2;
    execWithLogging setNewVersion;
    echo "$(date) | databus-maven-plugin | mvn deploy" >&2;
    execWithLogging deployRelease;

    #cleanup
    echo "$(date) | main | compress log files" >&2;
    compressLogs;
}

if [ ! -f "$SCRIPTROOT/generic-release.pid" ]; then
        (execWithLogging main; rm "$SCRIPTROOT/generic-release.pid") & echo $! > "$SCRIPTROOT/generic-release.pid"
fi