Skip to content
Snippets Groups Projects
Commit d3abe453 authored by kurzum's avatar kurzum
Browse files

post processing clean up

parent c5f8a99f
No related branches found
No related tags found
No related merge requests found
...@@ -32,42 +32,42 @@ extractDumps() { ...@@ -32,42 +32,42 @@ extractDumps() {
} }
# post-processing # post-processing, see http://dev.dbpedia.org/Post-Processing
postProcessing() { postProcessing() {
cd $DIEFDIR/scripts; cd $DIEFDIR/scripts;
echo "post-processing of $GROUP" echo "post-processing of $GROUP";
if [ "$GROUP" = "mappings" ] # resolve transitive links for all, affects the 'redirects' dataset
# TODO ResolveTransitiveLinks can take a wikidata interlanguage link parameter, that helps to sort the redirects
>&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
# Datasets for MapObjectURIs
if [ "$GROUP" = "mappings" ] || [ "$GROUP" = "test" ]
then then
>&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded; DATASETS="mappingbased-objects-uncleaned"
>&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 mappingbased-objects-uncleaned _redirected .ttl.bz2 @downloaded;
>&2 ../run TypeConsistencyCheck type.consistency.check.properties;
elif [ "$GROUP" = "wikidata" ] elif [ "$GROUP" = "wikidata" ]
then then
>&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects transitive-redirects .ttl.bz2 wikidata DATASETS="mappingbased-objects-uncleaned,raw"
>&2 ../run MapObjectUris $EXTRACTIONBASEDIR transitive-redirects .ttl.bz2 mappingbased-objects-uncleaned,raw -redirected .ttl.bz2 wikidata elif [ "$GROUP" = "generic" ] || [ "$GROUP" = "generic.en" ] || [ "$GROUP" = "sparktestgeneric" ]
>&2 ../run TypeConsistencyCheck type.consistency.check.properties;
elif [ "$GROUP" = "generic" ] || [ "$GROUP" = "generic.en" ]
then
>&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded;
>&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 disambiguations,infobox-properties,page-links,persondata,topical-concepts _redirected .ttl.bz2 @downloaded;
# todo untested line
for i in $(find $EXTRACTIONBASEDIR -name "*._redirects.ttl.bz2") ; do cp $i $LOGDIR ; rename -f 's/_redirected//' $i ; done
elif [ "$GROUP" = "text" ]
then then
echo "check whether text has post-processing" DATASETS="disambiguations,infobox-properties,page-links,persondata,topical-concepts"
fi
elif [ "$GROUP" = "test" ] #run mapobjectURIs
then >&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 $DATASETS _redirected .ttl.bz2 @downloaded;
echo "no postprocessing"
#>&2 ../run ResolveTransitiveLinks $EXTRACTIONBASEDIR redirects redirects_transitive .ttl.bz2 @downloaded; # Datasets with Typeconsistencycheck
#>&2 ../run MapObjectUris $EXTRACTIONBASEDIR redirects_transitive .ttl.bz2 mappingbased-objects-uncleaned _redirected .ttl.bz2 @downloaded; if [ "$GROUP" = "mappings" ] || [ "$GROUP" = "test" ] || [ "$GROUP" = "wikidata" ] || [ "$GROUP" = "generic" ] || [ "$GROUP" = "generic.en" ] || [ "$GROUP" = "sparktestgeneric" ]
#>&2 ../run TypeConsistencyCheckManual mappingbased-objects instance-types ro; then
elif [ "$GROUP" = "sparktestgeneric" ] >&2 ../run TypeConsistencyCheck type.consistency.check.properties;
then fi
echo "no postprocessing"
fi # Handling of redirects, i.e. copy to log and rename old
mkdir $LOGDIR/redirected
for i in $(find $EXTRACTIONBASEDIR -name "*_redirected.ttl.bz2") ; do
cp $i $LOGDIR/redirected ;
rename -f 's/_redirected//' $i ;
done
} }
# compress log files # compress log files
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment