diff --git a/databus-release.sh b/databus-release.sh
index d0447ad3fa51c50694a2f9dea005d75b7149546a..6cf8260698f538a19c2b0361c4c15ed08bd03cba 100755
--- a/databus-release.sh
+++ b/databus-release.sh
@@ -38,8 +38,7 @@ cd $DATABUSDIR/dbpedia/$GROUP;
 mvn versions:set -DnewVersion=$(ls * | grep '^[0-9]\{4\}.[0-9]\{2\}.[0-9]\{2\}$' | sort -u  | tail -1);
 # get git commit link
-GITSHORTHASH=${git log | head -1 | cut -f2 -d ' ' | grep -o "^......."  }
-GITHUBLINK=${git log | head -1 | cut -f2 -d ' ' | sed 's|^|https://github.com/dbpedia/extraction-framework/commit/|'}
 # TODO marvin: shouldn't this be the web dir directly?
diff --git a/extractionConfiguration/download.text.properties b/extractionConfiguration/download.text.properties
index 44c7d0bd7c6581e92be4aa378385bdf87bb7cb49..27fdec695ae1b2c411296527ad92488c0cb5e957 100644
--- a/extractionConfiguration/download.text.properties
+++ b/extractionConfiguration/download.text.properties
@@ -8,9 +8,7 @@ base-url=http://dumps.wikimedia.your.org/
 # source=pages-articles.xml.bz2
 # languages to download
-TODO testing for english
 # Unzip files while downloading? Not necessary, extraction will unzip on the fly. Let's save space.
@@ -20,4 +18,4 @@ retry-max=5
 #for specific dump dates (e.g. 20170101) if empty: the most recent dump-date is used
diff --git a/extractionConfiguration/extraction.test.properties b/extractionConfiguration/extraction.test.properties
index 64ca339722a1c0d9b06309357c63d70b9e86b928..c6a0af37e1a9adb9bbcf21bc5b30636ee715518c 100644
--- a/extractionConfiguration/extraction.test.properties
+++ b/extractionConfiguration/extraction.test.properties
@@ -16,7 +16,40 @@ languages=ro
 # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
+uri-policy.iri=generic:en; xml-safe-predicates:*
+#parameters specific for the abstract extraction
+# the tag path of the XML tags under which the result is expected
+# the properties used to specify long- and short abstracts (should not change)
+# the short abstract is at least this long
+#parameters specific to the nif extraction
+#only extract abstract (not the whole page)
+#the request query string
+#the xml path of the response
+# will leave out the long and short abstract datasets
+# will write all anchor texts for each nif instance
+# write only the anchor text for link instances
diff --git a/extractionConfiguration/extraction.text.en.properties b/extractionConfiguration/extraction.text.en.properties
index 58994e8de7abe985647d767766dcf5fd8c9625c1..2c2cf907432b50072687ad88bfc4fad41dd13761 100644
--- a/extractionConfiguration/extraction.text.en.properties
+++ b/extractionConfiguration/extraction.text.en.properties
@@ -78,7 +78,7 @@ short-abstract-min-length=200
 #parameters specific to the nif extraction
 #only extract abstract (not the whole page)
 #the request query string
 #the xml path of the response
diff --git a/extractionConfiguration/extraction.text.properties b/extractionConfiguration/extraction.text.properties
index 8e84911d5d3b05a035fd7683820dc75d46e2f4f2..8366fbf132159f263b5bc1684701551019fa34fc 100644
--- a/extractionConfiguration/extraction.text.properties
+++ b/extractionConfiguration/extraction.text.properties
@@ -5,7 +5,7 @@
 #log-dir= see: ../core/src/main/resources/universal.properties
 # WikiPages failed to extract in the first try can be retried with this option (especially interesting when extraction from the mediawiki api)
 # Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly. 
 # Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd-
@@ -23,13 +23,14 @@ require-download-complete=false
 # List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings'
 # NOTE sync with minidumps
 # default namespaces: Main, File, Category, Template
 # we only want abstracts for articles -> only main namespace
 # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
 # if ontology and mapping files are not given or do not exist, download info from mappings.dbpedia.org
diff --git a/extractionConfiguration/universal.properties.template b/extractionConfiguration/universal.properties.template
index 9b4c59373b21b6ab91dbdded33f0a491c871f54f..544adea116b06ca5c78b901da91a5ed7ff29edfd 100644
--- a/extractionConfiguration/universal.properties.template
+++ b/extractionConfiguration/universal.properties.template
@@ -1,7 +1,8 @@
 # NOTE: this properties files is imported in every extraction process and contains general parameters which only have to be set once for every release
 # The DBpedia version to be extracted (in this format: YYYY-MM)
+# SH Note: leaving it blank takes latest. 
 # Replace with your Wikipedia dump download directory (should not change over the course of a release)
 # base-dir=/data/extraction/wikidumps/
@@ -91,3 +92,7 @@ summarize-exceptions=true
 # Options for the SparkExtraction
+## Abstract and NIF extraction 
diff --git a/functions.sh b/functions.sh
index d1333627a2864532edb2d164a51ed9e227b91fac..317b9057ce87088de182f3c5216516cb2faff14d 100755
--- a/functions.sh
+++ b/functions.sh
@@ -35,7 +35,8 @@ extractDumps() {
        >&2 ../run sparkextraction $CONFIGDIR/extraction.generic.en.properties;
     elif ["$GROUP" = "text" ]
-      >&2 ../run extraction $CONFIGDIR/extraction.$GROUP.en.properties;
+      #>&2 ../run extraction $CONFIGDIR/extraction.$GROUP.en.properties;
+      >&2 ../run extraction $CONFIGDIR/extraction.$GROUP.properties;
 	# run for all
 	>&2 ../run extraction $CONFIGDIR/extraction.$GROUP.properties;
@@ -109,19 +110,19 @@ mapNamesToDatabus() {
     case "$1" in
 	# generic
-#        "article-templates-nested") echo "article-templates_nested";;
-#        "citation-data") echo "citations_data";;
-#        "citation-links") echo "citations_links";;
-#        "commons-page-links") echo "commons-sameas-links";;
-#        "page-ids") echo "page_ids";;
-#        "page-length") echo "page_length";;
-#        "page-links") echo "wikilinks";;
-#        "article-categories") echo "categories_articles";;
-#        "category-labels") echo "categories_labels";;
-#        "skos-categories") echo "categories_skos";;
-#        "revision-ids") echo "revision_ids";;
-#        "revision-uris") echo "revision_uris";;
+        "article-templates-nested") echo "article-templates_nested";;
+        "citation-data") echo "citations_data";;
+        "citation-links") echo "citations_links";;
+        "commons-page-links") echo "commons-sameas-links";;
+        "page-ids") echo "page_ids";;
+        "page-length") echo "page_length";;
+        "page-links") echo "wikilinks";;
+        "article-categories") echo "categories_articles";;
+        "category-labels") echo "categories_labels";;
+        "skos-categories") echo "categories_skos";;
+        "revision-ids") echo "revisions_ids";;
+        "revision-uris") echo "revisions_uris";;
        # mappings
 	"mappingbased-objects-disjoint-domain") echo "mappingbased-objects_disjointDomain";;
 	"mappingbased-objects-disjoint-range")  echo "mappingbased-objects_disjointRange";;
@@ -132,8 +133,8 @@ mapNamesToDatabus() {
 	"labels-nmw") echo "labels_nmw";;
 	"mappingbased-properties-reified-qualifiers") echo "mappingbased-properties-reified_qualifiers";;
 	"mappingbased-objects-uncleaned-redirected") echo "mappingbased-objects";;
-	"revision-ids") echo "revision_ids";;
-	"revision-uris") echo "revision_uris";;
+	"revision-ids") echo "revisions_ids";;
+	"revision-uris") echo "revisions_uris";;
 	"wikidata-duplicate-iri-split") echo "debug_duplicateirisplit";;
 	"wikidata-r2r-mapping-errors") echo "debug_r2rmappingerrors";;
 	"wikidata-type-like-statements") echo "debug_typelikestatements";;
@@ -199,8 +200,8 @@ mapAndCopy() {
+diefCommitLink() {
+	echo "https://github.com/dbpedia/extraction-framework/commit/$(git rev-parse @)"
diff --git a/marvin_extraction_run.sh b/marvin_extraction_run.sh
index 8ded91b3dc8f9bfc4ba1ec7464db6ad9a5d0c329..929ce8bad5a984adfc0c8b3521c89f66f5c3a117 100755
--- a/marvin_extraction_run.sh
+++ b/marvin_extraction_run.sh
@@ -1,5 +1,7 @@
+echo "LC_ALL=en_US.UTF-8"
+export LC_ALL=en_US.UTF-8
diff --git a/merge_some_into b/merge_some_into
new file mode 100644
index 0000000000000000000000000000000000000000..b14431f76d3b591254477932a5743307f568d9bb
--- /dev/null
+++ b/merge_some_into
@@ -0,0 +1,231 @@
+#| DBpedia Spotlight - Create database-backed model                                                                             |
+#| @author Joachim Daiber                                                                                                       |
+# $1 Working directory
+# $2 Locale (en_US)
+# $3 Stopwords file
+# $4 Analyzer+Stemmer language prefix e.g. Dutch
+# $5 Model target folder
+export MAVEN_OPTS="-Xmx26G"
+usage ()
+  echo "index_db.sh"
+  echo "usage: ./index_db.sh -o /data/spotlight/nl/opennlp wdir nl_NL /data/spotlight/nl/stopwords.nl.list Dutch /data/spotlight/nl/final_model"
+  echo "Create a database-backed model of DBpedia Spotlight for a specified language."
+  echo " "
+while getopts "eo:b:" opt; do
+  case $opt in
+    o) opennlp="$OPTARG";;
+    e) eval="true";;
+    b) blacklist="$OPTARG";;
+  esac
+shift $((OPTIND - 1))
+if [ $# != 5 ]
+    usage
+    exit
+function get_path {
+  if [[ "$1"  = /* ]]
+  then
+    echo "$1"
+  else
+   echo "$BASE_DIR/$1"
+  fi
+BASE_WDIR=$(get_path $1)
+TARGET_DIR=$(get_path $5)
+STOPWORDS=$(get_path $3)
+if [[ "$opennlp" != "None" ]]; then
+  opennlp=$(get_path $opennlp)
+if [[ "$blacklist" != "false" ]]; then
+  blacklist=$(get_path $blacklist)
+LANGUAGE=`echo $2 | sed "s/_.*//g"`
+echo "Language: $LANGUAGE"
+echo "Working directory: $WDIR"
+mkdir -p $WDIR
+# Preparing the data.
+echo "Loading Wikipedia dump..."
+if [ -z "$WIKI_MIRROR" ]; then
+  WIKI_MIRROR="https://dumps.wikimedia.org/"
+echo Checking for wikipedia dump at $WP_DOWNLOAD_FILE
+if [ -f "$WP_DOWNLOAD_FILE" ]; then
+  echo File exists.
+  echo Downloading wikipedia dump.
+  if [ "$eval" == "false" ]; then
+    curl -# "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2" | bzcat > $WDIR/dump.xml
+  else
+    curl -# "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2" | bzcat | python $BASE_DIR/scripts/split_train_test.py 1200 $WDIR/heldout.txt > $WDIR/dump.xml
+  fi
+cd $WDIR
+cp $STOPWORDS stopwords.$LANGUAGE.list
+if [ -e "$opennlp/$LANGUAGE-token.bin" ]; then
+  cp "$opennlp/$LANGUAGE-token.bin" "$LANGUAGE.tokenizer_model" || echo "tokenizer already exists"
+  touch "$LANGUAGE.tokenizer_model"
+# DBpedia extraction:
+echo "Creating DBpedia nt files..."
+if [ -d extraction-framework ]; then
+    echo "Updating DBpedia Spotlight..."
+    cd extraction-framework
+    git reset --hard HEAD
+    git pull
+    mvn install
+    echo "Setting up DEF..."
+    git clone git://github.com/dbpedia/extraction-framework.git
+    cd extraction-framework
+    mvn install
+cd dump
+dumpdate=$(date +%Y%m%d)
+mkdir -p $dumpdir
+ln -s $WDIR/dump.xml $dumpdir/${LANGUAGE}wiki-${dumpdate}-dump.xml
+cat << EOF > dbpedia.properties
+uri-policy.uri=uri:en; generic:en; xml-safe-predicates:*
+if [[ ",ga,ar,be,bg,bn,ced,cs,cy,da,eo,et,fa,fi,gl,hi,hr,hu,id,ja,lt,lv,mk,mt,sk,sl,sr,tr,ur,vi,war,zh," == *",$LANGUAGE,"* ]]; then #Languages with no disambiguation definitions
+     echo "extractors=.RedirectExtractor,.MappingExtractor" >> dbpedia.properties
+     echo "extractors=.RedirectExtractor,.DisambiguationExtractor,.MappingExtractor" >> dbpedia.properties
+../run extraction dbpedia.properties
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-instance-types*.nt.gz > $WDIR/instance_types.nt
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-disambiguations-unredirected.nt.gz > $WDIR/disambiguations.nt
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-redirects.nt.gz > $WDIR/redirects.nt
+rm -Rf $dumpdir
+# Setting up Spotlight:
+if [ -d dbpedia-spotlight ]; then
+    echo "Updating DBpedia Spotlight..."
+    cd dbpedia-spotlight
+    git reset --hard HEAD
+    git pull
+    mvn -T 1C -q clean install
+    echo "Setting up DBpedia Spotlight..."
+    git clone --depth 1 https://github.com/dbpedia-spotlight/dbpedia-spotlight-model
+    mv dbpedia-spotlight-model dbpedia-spotlight
+    cd dbpedia-spotlight
+    mvn -T 1C -q clean install
+# Extracting wiki stats:
+rm -Rf wikistatsextractor
+git clone --depth 1 https://github.com/dbpedia-spotlight/wikistatsextractor
+# Stop processing if one step fails
+set -e
+#Copy results to local:
+cd $BASE_WDIR/wikistatsextractor
+mvn install exec:java -Dexec.args="--output_folder $WDIR $LANGUAGE $2 $4Stemmer $WDIR/dump.xml $WDIR/stopwords.$LANGUAGE.list"
+if [ "$blacklist" != "false" ]; then
+  echo "Removing blacklist URLs..."
+  mv $WDIR/uriCounts $WDIR/uriCounts_all
+  grep -v -f $blacklist $WDIR/uriCounts_all > $WDIR/uriCounts
+echo "Finished wikistats extraction. Cleaning up..."
+rm -f $WDIR/dump.xml
+# Building Spotlight model:
+#Create the model:
+cd $BASE_WDIR/dbpedia-spotlight
+mvn -pl index exec:java -Dexec.mainClass=org.dbpedia.spotlight.db.CreateSpotlightModel -Dexec.args="$2 $WDIR $TARGET_DIR $opennlp $STOPWORDS $4Stemmer"
+if [ "$eval" == "true" ]; then
+  mvn -pl eval exec:java -Dexec.mainClass=org.dbpedia.spotlight.evaluation.EvaluateSpotlightModel -Dexec.args="$TARGET_DIR $WDIR/heldout.txt" > $TARGET_DIR/evaluation.txt
+curl https://raw.githubusercontent.com/dbpedia-spotlight/model-quickstarter/master/model_readme.txt > $TARGET_DIR/README.txt
+curl "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2-rss.xml" | grep link | sed -e 's/^.*<link>//' -e 's/<[/]link>.*$//' | uniq >> $TARGET_DIR/README.txt
+echo "Collecting data..."
+mkdir -p data/$LANGUAGE && mv $WDIR/*Counts data/$LANGUAGE
+gzip $WDIR/*.nt &
+set +e