Skip to content
Snippets Groups Projects
functions.sh 3 KiB
Newer Older
Sebastian Hellmann's avatar
Sebastian Hellmann committed
#!/bin/bash


##############
# setup paths
##############

ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
CONFIGDIR="$ROOT/extractionConfiguration"
Sebastian Hellmann's avatar
Sebastian Hellmann committed
DIEFDIR="$ROOT/marvin-extraction/extraction-framework"
LOGDIR="$ROOT/marvin-extraction/logs/$(date +%Y-%m-%d)"  && mkdir -p $LOGDIR
EXTRACTIONBASEDIR="$ROOT/marvin-extraction/wikidumps" && mkdir -p $EXTRACTIONBASEDIR

# TODO
RELEASEDIR="$ROOT/marvin-extraction/release"
DATABUSDIR="$ROOT/marvin-extraction/databus-maven-plugin"
Sebastian Hellmann's avatar
Sebastian Hellmann committed
# mkdir -p $RELEASEDIR

##############
# functions
##############

Sebastian Hellmann's avatar
Sebastian Hellmann committed
# downlaod and extract data
extractDumps() {
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    cd $DIEFDIR/dump;
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        
    # exception for generic, 1. spark, 2. as English is big and has to be run separately
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    if [ "$GROUP" = "generic" ]
    then
Sebastian Hellmann's avatar
Sebastian Hellmann committed
       >&2 ../run sparkextraction $CONFIGDIR/extraction.generic.properties;
       >&2 ../run sparkextraction $CONFIGDIR/extraction.generic.en.properties;
    else
		# run for all 
	    >&2 ../run extraction $CONFIGDIR/extraction.$GROUP.properties;
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    fi    
Sebastian Hellmann's avatar
Sebastian Hellmann committed
# post-processing
postProcessing() {
Sebastian Hellmann's avatar
Sebastian Hellmann committed
	
	# TODO move databus scripts in extra function
    #       cd $CONFIGDIR;
    #       source prepareMappingsArtifacts.sh; BASEDIR=$EXTRACTIONBASEDIR; DATABUSMVNPOMDIR=$DATAPUSMAVENPLUGINPOMDIR;
    #      prepareM;
Sebastian Hellmann's avatar
Sebastian Hellmann committed

Sebastian Hellmann's avatar
Sebastian Hellmann committed

    cd $DIEFDIR/scripts;
    echo "post-processing of $GROUP"
    
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    if [ "$GROUP" = "mappings" ]
    then
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        >&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
        >&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 mappingbased-objects-uncleaned _redirected .ttl.bz2 @downloaded;
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        >&2 ../run TypeConsistencyCheck type.consistency.check.properties;
    elif [ "$GROUP" = "wikidata" ]
    then
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        >&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects transitive-redirects .ttl.bz2 wikidata
        >&2 ../run MapObjectUris $EXTRACTIONBASEDIR transitive-redirects .ttl.bz2 mappingbased-objects-uncleaned,raw -redirected .ttl.bz2 wikidata
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        >&2 ../run TypeConsistencyCheck type.consistency.check.properties;
    elif [ "$GROUP" = "generic" ] 
    then
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        >&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
        >&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 disambiguations,infobox-properties,page-links,persondata,topical-concepts _redirected .ttl.bz2 @downloaded;
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    elif [ "$GROUP" = "abstract" ]
    then
Sebastian Hellmann's avatar
Sebastian Hellmann committed
        echo "TODO"
Sebastian Hellmann's avatar
Sebastian Hellmann committed

    elif [ "$GROUP" = "test" ]
    then 
        >&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
        >&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 mappingbased-objects-uncleaned _redirected .ttl.bz2 @downloaded;
        >&2 ../run TypeConsistencyCheckManual mappingbased-objects instance-types ro;
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    fi
}

# compress log files
archiveLogFiles() {
	# todo copy to some archive
Sebastian Hellmann's avatar
Sebastian Hellmann committed
    for f in $(find $LOGDIR -type f ); do lbzip2 -f $f; done;
Sebastian Hellmann's avatar
Sebastian Hellmann committed