Commit 76923bbe authored by Sebastian Hellmann's avatar Sebastian Hellmann
Browse files

moved variables to function

parent 357da285
......@@ -13,10 +13,11 @@ We accept pull requests to improve the config files, all contributions will be m
```
git clone https://git.informatik.uni-leipzig.de/dbpedia-assoc/marvin-config
cd marvin-config
# delete previous versions of the DIEF
# (optional) delete previous versions of the DIEF
rm -rf marvin-config/extraction-framework
# install dief in marvin-extraction/extraction-framework
./setup-dief.sh
# test Romanian extraction, very small
# test run Romanian extraction, very small
./marvin_extraction_run.sh --group=test
```
......@@ -32,7 +33,11 @@ To run the other extractions, use either
## Cronjobs
Below is a list of cronjobs we use on the different servers
Below is a list of cronjobs we use on the different servers:
```
TODO
```
## Acknowledgements
......@@ -44,7 +49,9 @@ We thank Sören Auer and the Technische Informationsbibliothek (TIB) for providi
This contribution by TIB to DBpedia & its community is a great push towards incentivizing Open Data and establishing a global and national research and innovation data infrastructure.
# Workflow
# Workflow Description
##
## Downloading the wikimedia dumps
TODO
......
......@@ -5,11 +5,13 @@ dbpedia-version=2018-10
# Replace with your Wikipedia dump download directory (should not change over the course of a release)
# base-dir=/data/extraction/wikidumps/
base-dir=$BASEDIR
# AUTOMATICALLY SET BY setup-dief.sh
# base-dir=$BASEDIR
# The log file directory - used to store all log files created in the course of all extractions
# log-dir=/data/extraction/logs/extraction/
log-dir=$LOGDIR/extraction/
# AUTOMATICALLY SET BY setup-dief.sh
# log-dir=$LOGDIR/extraction/
# to forward extraction summaries and warnings via the slack API, use this option
-slack-webhook=https://hooks.slack.com/services/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
......
#!/bin/bash
##############
# setup paths
##############
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
CONFIGDIR="$ROOT/extractionConfiguration"
DIEFDIR="$ROOT/marvin-extraction/extraction-framework" && mkdir -p $LOGDIR
LOGDIR="$ROOT/marvin-extraction/logs/$(date +%Y-%m-%d)"
EXTRACTIONBASEDIR="$ROOT/marvin-extraction/wikidumps" && mkdir -p $EXTRACTIONBASEDIR
# TODO
RELEASEDIR="$ROOT/marvin-extraction/release"
DATAPUSMAVENPLUGINPOMDIR="$ROOT/databus-maven-plugin"
DATAPUSMAVENPLUGINPOMGIT="https://github.com/dbpedia/databus-maven-plugin.git"
mkdir -p $RELEASEDIR
##############
# functions
##############
# downlaod and extract data
extractDumps() {
cd $DIEFDIR/dump;
......
#!/bin/bash
HELP="usage:
--group={test|generic|mappings|wikidata} [--databus-deploy|--skip-dief-install]
--group={test|generic|mappings|wikidata} [--databus-deploy]
description:
--group={test|generic|mappings|wikidata} : required
selects download.\$GROUP.properties and extraction.\$GROUP.properties from extractionConfig dir
Some exceptions are hard coded like 'extraction.generic.en.properties'
[--skip-dief-install] : optional
'false' -> each run does a fresh checkout install of the DIEF (DBpedia Information Extraction Framework)
'true' -> skipped
"
#######################
# include all functions and path variables
#######################
source functions.sh
##############
# setup paths
##############
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/marvin-extraction"
CONFIGDIR="$ROOT/extractionConfiguration"
# set and create
LOGDIR="$ROOT/logs/$(date +%Y-%m-%d)" && mkdir -p $LOGDIR
DIEFDIR="$ROOT/extraction-framework"
# TODO
EXTRACTIONBASEDIR="$ROOT/wikidumps"
DATAPUSMAVENPLUGINPOMDIR="$ROOT/databus-maven-plugin"
RELEASEDIR="$ROOT/release"
DATAPUSMAVENPLUGINPOMGIT="https://github.com/dbpedia/databus-maven-plugin.git"
mkdir -p $EXTRACTIONBASEDIR
mkdir -p $RELEASEDIR
#################
#check arguments
......@@ -76,23 +57,20 @@ then
fi
#######################
# include all functions
#######################
source functions.sh
#######################
# RUN (requires setup-dief.sh)
#######################
# DOWNLOAD ONTOLOGY and MAPPINGS
cd $DIEFDIR/core;
../run download-ontology &> $LOGDIR/downloadOntology.log;
../run download-mappings &> $LOGDIR/downloadMappings.log;
cd $DIEFDIR/core
../run download-ontology &> $LOGDIR/downloadOntology.log
../run download-mappings &> $LOGDIR/downloadMappings.log
# DOWNLOAD WIKIDUMPS
cd $DIEFDIR/dump
../run download $CONFIGDIR/download.$GROUP.properties &> $LOGDIR/downloadWikidumps.log;
../run download $CONFIGDIR/download.$GROUP.properties &> $LOGDIR/downloadWikidumps.log
# EXTRACT
#extractDumps &> $LOGDIR/extraction.log;
......
#!/bin/bash
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/marvin-extraction"
CONFIGDIR="$ROOT/extractionConfiguration"
DIEFDIR="$ROOT/extraction-framework"
# get all variables and functions
source functions.sh
cd marvin-extraction
git clone "https://github.com/dbpedia/extraction-framework.git" $DIEFDIR
cd $DIEFDIR
# todo add config
#cd $ROOT && cp $ROOT/config.d/universal.properties.template $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
#sed -i -e 's,$BASEDIR,'$EXTRACTIONBASEDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
#sed -i -e 's,$LOGDIR,'$LOGDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
# concat universial props
echo "base-dir=$EXTRACTIONBASEDIR" > $DIEFDIR/core/src/main/resources/universal.properties
echo "log-dir=$LOGDIR/extraction/" >> $DIEFDIR/core/src/main/resources/universal.properties
cat $CONFIGDIR/universal.properties.template >> $DIEFDIR/core/src/main/resources/universal.properties
mvn clean install
mvn clean install &> $LOGDIR/installDIEF.log
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment