Skip to content
Snippets Groups Projects
Commit 20d6c734 authored by Your Name's avatar Your Name
Browse files

mappings init

parent d2921b77
No related branches found
No related tags found
No related merge requests found
# Edit this file to introduce tasks to be run by cron.
#
# Each task to run has to be defined through a single line
# indicating with different fields when the task will be run
# and what command to run for the task
#
# To define the time you can provide concrete values for
# minute (m), hour (h), day of month (dom), month (mon),
# and day of week (dow) or use '*' in these fields (for 'any').#
# Notice that tasks will be started based on the cron's system
# daemon's notion of time and timezones.
#
# Output of the crontab jobs (including errors) is sent through
# email to the user the crontab file belongs to (unless redirected).
#
# For example, you can run a backup of all your user accounts
# at 5 a.m every week with:
# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
#
# For more information see the manual pages of crontab(5) and cron(8)
#
# m h dom mon dow command
0 0 7 * * /bin/bash -c '/home/extractor/schedule/run-extraction.sh' >/dev/null 2>&1
#!/bin/bash
/home/extractor/schedule/run-extraction.sh
#!/bin/bash
extraction_data="/data/extraction/wikidumps/"
databus_folder="/data/extraction/databus-maven-plugin/dbpedia/"
#databus_maven_plugin_structure="src/main/databus"
databus_maven_plugin_structure=""
# @ALL|@GENERIC|@MAPPINGS or seperated by SPACE
artifacts="@MAPPINGS"
filter_extension=".ttl.*"
group="mappings"
rapper=false
tmp_folder="$databus_folder/tmp/"
# --- FUNC ---
# zh_yue to yue, zh_min_nan to nan, bat_smg to batsmg
function name_to_variant {
case $1 in
"zh_yue") echo "lang=yue";;
"zh_min_nan") echo "lang=nan";;
"bat_smg") echo "lang=batsmg";;
"commons") echo "commons";;
*) echo "lang=$1";;
esac
}
# mv instance-types-transitive in instance-types
function merge_to_artifact {
case $1 in
"instance-types-transitive") echo "instance-types";;
*) echo $1;;
esac
}
function additional_content_variants {
case $1 in
"instance-types-transitive") echo "_transitive";;
*) echo "";;
esac
}
function process_file {
echo "processing: $1"
if [ $rapper = true ]; then
# debug-artifact
if [ ! -d $databus_folder/debug/debug_$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version/ ]; then
mkdir -p $databus_folder/debug/debug-$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version/
fi
if [ ! -d $tmp_folder/$(merge_to_artifact $artifact)/$version/ ]; then
mkdir -p $tmp_folder/$(merge_to_artifact $artifact)/$version/
mkdir -p $tmp_folder/$(merge_to_artifact $artifact)/$version/sort/
fi
debug_pipe="$tmp_folder/$(merge_to_artifact $artifact)/$version/${content_variant}_debug.pipe"
if [ ! -f $debug_pipe ]; then
mkfifo $debug_pipe
fi
debug_file_path="$databus_folder/debug/debug-$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version/debug-$(merge_to_artifact $artifact)_${content_variant}_debug=rapper.bz2"
tmpfile=$tmp_folder/$(merge_to_artifact $artifact)/$version/$content_variant
lbzip2 -dc $1 \
| rapper -i ntriples -O - - file 2>>$debug_pipe \
| ascii2uni -a U 2>>$debug_pipe \
| LC_ALL=C sort --parallel=4 -u -T $tmp_folder/$(merge_to_artifact $artifact)/$version/sort/ \
| lbzip2 > $tmpfile &
lbzip2 -c < $debug_pipe > $debug_file_path
mv $tmpfile $2;
rm $debug_pipe
else
#cp -vn $1 $2
pv $1 > $2
fi
}
function prepare_databus_artifacts {
echo "prepare: $1"
for artifact in $1; do
for file_path in $(find $extraction_data -name "*[0-9]-$artifact$filter_extension"); do
#echo "preparing: $file_path"
file="${file_path##*/}"
dump_version="${file_path%/*}"
dump_version="${dump_version##*/}"
version="${dump_version:0:4}.${dump_version:4:2}.${dump_version:6:2}"
content_variant="$(name_to_variant ${file%%wiki-*})$(additional_content_variants $artifact)"
new_file="$(merge_to_artifact $artifact)_$content_variant.${file#*.}"
# folder check
if [ ! -d $databus_folder/$group/$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version/ ]; then
mkdir -p $databus_folder/$group/$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version
fi
new_file_path="$databus_folder/$group/$(merge_to_artifact $artifact)/$databus_maven_plugin_structure/$version/$new_file"
if [ -f $new_file_path ]; then
echo "skipping $file_path"
else
process_file $file_path $new_file_path
fi
done
echo "artifact: $artifact"
done
}
# --- MAIN ---
case $artifacts in
"@MAPPINGS")
prepare_databus_artifacts "instance-types instance-types-transitive mappingbased-literals mappingbased-objects-uncleaned specific-mappingbased-properties geo-coordinates-mappingbased" ;;
"@GENERIC")
echo "TODO prepare: @GENERIC" ;;
"@ALL")
echo "TODO prepare: @ALL" ;;
*)
prepare_databus_artifacts $artifacts ;;
esac
#!/bin/bash
set -e
rootDir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
mvnLogs=/data/extraction/logs/mvn
currentDate=$(date +%Y-%m-%d)
extraFrameW=/home/extractor/extraction-framework/
dataPlugDir=/data/extraction/databus-maven-plugin/dbpedia/mappings/
function download-xml(){
cd $extraFrameW/dump;
../run download download.mappings.properties \
> $mvnLogs/$currentDate-mappingbased.download.out \
2> $mvnLogs/$currentDate-mappingbased.download.err;
}
function download-ontology(){
cd $extraFrameW/dump;
../run download-ontology;
}
function download-mappings(){
cd $extraFrameW/dump;
../run download-mappings;
}
function extraction(){
cd $extraFrameW/dump;
../run extraction extraction.mappings.properties \
> $mvnLogs/$currentDate-mappingbased.extraction.out \
2> $mvnLogs/$currentDate-mappingbased.extraction.err;
}
function setNewestVersion(){
cd $dataPlugDir
mvn versions:set -DnewVersion=$(ls * | grep '^[0-9]\{4\}.[0-9]\{2\}.[0-9]\{2\}$' | sort -u | tail -1)
}
function package(){
cd $dataPlugDir
mvn package
}
function deploy(){
cd $dataPlugDir
mvn deploy
}
function main(){
download-xml;
download-ontology;
download-mappings;
extraction;
# sep. conf.
$rootDir/extractionToPlugin.sh;
setNewestVersion;
package;
deploy;
}
main
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment