diff --git a/databus-release.sh b/databus-release.sh
index d0447ad3fa51c50694a2f9dea005d75b7149546a..6cf8260698f538a19c2b0361c4c15ed08bd03cba 100755
--- a/databus-release.sh
+++ b/databus-release.sh
@@ -38,8 +38,7 @@ cd $DATABUSDIR/dbpedia/$GROUP;
 mvn versions:set -DnewVersion=$(ls * | grep '^[0-9]\{4\}.[0-9]\{2\}.[0-9]\{2\}$' | sort -u  | tail -1);
 
 # get git commit link
-GITSHORTHASH=${git log | head -1 | cut -f2 -d ' ' | grep -o "^......."  }
-GITHUBLINK=${git log | head -1 | cut -f2 -d ' ' | sed 's|^|https://github.com/dbpedia/extraction-framework/commit/|'}
+GITHUBLINK="$(diefCommitLink)"
 
 PUBLISHER="https://vehnem.github.io/webid.ttl#this";
 # TODO marvin: shouldn't this be the web dir directly?
diff --git a/extractionConfiguration/download.text.properties b/extractionConfiguration/download.text.properties
index 44c7d0bd7c6581e92be4aa378385bdf87bb7cb49..27fdec695ae1b2c411296527ad92488c0cb5e957 100644
--- a/extractionConfiguration/download.text.properties
+++ b/extractionConfiguration/download.text.properties
@@ -8,9 +8,7 @@ base-url=http://dumps.wikimedia.your.org/
 # source=pages-articles.xml.bz2
 
 # languages to download
-TODO testing for english
-languages=en
-#,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
+languages=en,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
 
 # Unzip files while downloading? Not necessary, extraction will unzip on the fly. Let's save space.
 unzip=false
@@ -20,4 +18,4 @@ retry-max=5
 retry-millis=10000
 
 #for specific dump dates (e.g. 20170101) if empty: the most recent dump-date is used
-dump-date=
+dump-date=20200201
diff --git a/extractionConfiguration/extraction.test.properties b/extractionConfiguration/extraction.test.properties
index 64ca339722a1c0d9b06309357c63d70b9e86b928..c6a0af37e1a9adb9bbcf21bc5b30636ee715518c 100644
--- a/extractionConfiguration/extraction.test.properties
+++ b/extractionConfiguration/extraction.test.properties
@@ -16,7 +16,40 @@ languages=ro
 
 # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
 
-extractors=.MappingExtractor,.RedirectExtractor
+#extractors=.NifExtractor
+extractors=.MappingExtractor
+
+uri-policy.iri=generic:en; xml-safe-predicates:*
+format.ttl.bz2=turtle-triples;uri-policy.iri
+mwc-apiUrl=https://{{LANG}}.wikipedia.org/w/api.php
+mwc-maxRetries=5
+mwc-connectMs=4000
+mwc-readMs=30000
+mwc-sleepFactor=2000
+#parameters specific for the abstract extraction
+abstract-query=&format=xml&action=query&prop=extracts&exintro=&explaintext=&titles=%s
+# the tag path of the XML tags under which the result is expected
+abstract-tags=api,query,pages,page,extract
+# the properties used to specify long- and short abstracts (should not change)
+short-abstracts-property=rdfs:comment
+long-abstracts-property=abstract
+# the short abstract is at least this long
+short-abstract-min-length=200
+
+#parameters specific to the nif extraction
+
+#only extract abstract (not the whole page)
+nif-extract-abstract-only=true
+#the request query string
+nif-query=&format=xml&action=parse&prop=text&page=%s&pageid=%d
+#the xml path of the response
+nif-tags=api,parse,text
+# will leave out the long and short abstract datasets
+nif-isTestRun=false
+# will write all anchor texts for each nif instance
+nif-write-anchor=true
+# write only the anchor text for link instances
+nif-write-link-anchor=true
 
 #extractors.ar=.MappingExtractor,.TopicalConceptsExtractor
 #
diff --git a/extractionConfiguration/extraction.text.en.properties b/extractionConfiguration/extraction.text.en.properties
index 58994e8de7abe985647d767766dcf5fd8c9625c1..2c2cf907432b50072687ad88bfc4fad41dd13761 100644
--- a/extractionConfiguration/extraction.text.en.properties
+++ b/extractionConfiguration/extraction.text.en.properties
@@ -78,7 +78,7 @@ short-abstract-min-length=200
 #parameters specific to the nif extraction
 
 #only extract abstract (not the whole page)
-nif-extract-abstract-only=false
+nif-extract-abstract-only=true
 #the request query string
 nif-query=&format=xml&action=parse&prop=text&page=%s&pageid=%d
 #the xml path of the response
diff --git a/extractionConfiguration/extraction.text.properties b/extractionConfiguration/extraction.text.properties
index 8e84911d5d3b05a035fd7683820dc75d46e2f4f2..8366fbf132159f263b5bc1684701551019fa34fc 100644
--- a/extractionConfiguration/extraction.text.properties
+++ b/extractionConfiguration/extraction.text.properties
@@ -5,7 +5,7 @@
 #log-dir= see: ../core/src/main/resources/universal.properties
 
 # WikiPages failed to extract in the first try can be retried with this option (especially interesting when extraction from the mediawiki api)
-retry-failed-pages=false
+#retry-failed-pages=true
 
 # Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly. 
 # Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd-
@@ -23,13 +23,14 @@ require-download-complete=false
 
 # List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings'
 # NOTE sync with minidumps
-languages=af,als,am,an,arz,ast,azb,ba,bar,bat-smg,bpy,br,bs,bug,cdo,ce,ceb,ckb,cv,fo,fy,gd,he,hsb,ht,ia,ilo,io,is,jv,ka,kn,ku,ky,la,lb,li,lmo,mai,mg,min,ml,mn,mr,mrj,ms,mt,my,mzn,nah,nap,nds,ne,new,nn,no,oc,or,os,pa,pms,pnb,qu,sa,sah,scn,sco,sh,si,simple,sq,su,sw,ta,te,tg,th,tl,tt,uz,vec,wa,xmf,yo,zh-min-nan,zh-yue
+languages=en,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
 # default namespaces: Main, File, Category, Template
 # we only want abstracts for articles -> only main namespace
-namespaces=Main
+#namespaces=Main
 
 # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
-
+parallel-processes=8
+run-jobs-in-parallel=true
 extractors=.NifExtractor
 
 # if ontology and mapping files are not given or do not exist, download info from mappings.dbpedia.org
diff --git a/extractionConfiguration/universal.properties.template b/extractionConfiguration/universal.properties.template
index 9b4c59373b21b6ab91dbdded33f0a491c871f54f..544adea116b06ca5c78b901da91a5ed7ff29edfd 100644
--- a/extractionConfiguration/universal.properties.template
+++ b/extractionConfiguration/universal.properties.template
@@ -1,7 +1,8 @@
 # NOTE: this properties files is imported in every extraction process and contains general parameters which only have to be set once for every release
 
 # The DBpedia version to be extracted (in this format: YYYY-MM)
-dbpedia-version=2018-10
+# SH Note: leaving it blank takes latest. 
+dbpedia-version=
 
 # Replace with your Wikipedia dump download directory (should not change over the course of a release)
 # base-dir=/data/extraction/wikidumps/
@@ -91,3 +92,7 @@ summarize-exceptions=true
 # Options for the SparkExtraction
 spark-master=local[32]
 spark-local-dir=/data/extraction/spark.local.dir/
+
+###########################
+## Abstract and NIF extraction 
+
diff --git a/functions.sh b/functions.sh
index d1333627a2864532edb2d164a51ed9e227b91fac..317b9057ce87088de182f3c5216516cb2faff14d 100755
--- a/functions.sh
+++ b/functions.sh
@@ -35,7 +35,8 @@ extractDumps() {
        >&2 ../run sparkextraction $CONFIGDIR/extraction.generic.en.properties;
     elif ["$GROUP" = "text" ]
     then
-      >&2 ../run extraction $CONFIGDIR/extraction.$GROUP.en.properties;
+      #>&2 ../run extraction $CONFIGDIR/extraction.$GROUP.en.properties;
+      >&2 ../run extraction $CONFIGDIR/extraction.$GROUP.properties;
     else
 	# run for all
 	>&2 ../run extraction $CONFIGDIR/extraction.$GROUP.properties;
@@ -109,19 +110,19 @@ mapNamesToDatabus() {
     case "$1" in
 
 	# generic
-#        "article-templates-nested") echo "article-templates_nested";;
-#        "citation-data") echo "citations_data";;
-#        "citation-links") echo "citations_links";;
-#        "commons-page-links") echo "commons-sameas-links";;
-#        "page-ids") echo "page_ids";;
-#        "page-length") echo "page_length";;
-#        "page-links") echo "wikilinks";;
-#        "article-categories") echo "categories_articles";;
-#        "category-labels") echo "categories_labels";;
-#        "skos-categories") echo "categories_skos";;
-#        "revision-ids") echo "revision_ids";;
-#        "revision-uris") echo "revision_uris";;
-#
+        "article-templates-nested") echo "article-templates_nested";;
+        "citation-data") echo "citations_data";;
+        "citation-links") echo "citations_links";;
+        "commons-page-links") echo "commons-sameas-links";;
+        "page-ids") echo "page_ids";;
+        "page-length") echo "page_length";;
+        "page-links") echo "wikilinks";;
+        "article-categories") echo "categories_articles";;
+        "category-labels") echo "categories_labels";;
+        "skos-categories") echo "categories_skos";;
+        "revision-ids") echo "revisions_ids";;
+        "revision-uris") echo "revisions_uris";;
+
        # mappings
 	"mappingbased-objects-disjoint-domain") echo "mappingbased-objects_disjointDomain";;
 	"mappingbased-objects-disjoint-range")  echo "mappingbased-objects_disjointRange";;
@@ -132,8 +133,8 @@ mapNamesToDatabus() {
 	"labels-nmw") echo "labels_nmw";;
 	"mappingbased-properties-reified-qualifiers") echo "mappingbased-properties-reified_qualifiers";;
 	"mappingbased-objects-uncleaned-redirected") echo "mappingbased-objects";;
-	"revision-ids") echo "revision_ids";;
-	"revision-uris") echo "revision_uris";;
+	"revision-ids") echo "revisions_ids";;
+	"revision-uris") echo "revisions_uris";;
 	"wikidata-duplicate-iri-split") echo "debug_duplicateirisplit";;
 	"wikidata-r2r-mapping-errors") echo "debug_r2rmappingerrors";;
 	"wikidata-type-like-statements") echo "debug_typelikestatements";;
@@ -199,8 +200,8 @@ mapAndCopy() {
 
 }
 
+diefCommitLink() {
 
-
-
-
-
+	cd $DIEFDIR
+	echo "https://github.com/dbpedia/extraction-framework/commit/$(git rev-parse @)"
+}
diff --git a/marvin_extraction_run.sh b/marvin_extraction_run.sh
index 8ded91b3dc8f9bfc4ba1ec7464db6ad9a5d0c329..929ce8bad5a984adfc0c8b3521c89f66f5c3a117 100755
--- a/marvin_extraction_run.sh
+++ b/marvin_extraction_run.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+echo "LC_ALL=en_US.UTF-8"
+export LC_ALL=en_US.UTF-8
 
 
 #######################
diff --git a/merge_some_into b/merge_some_into
new file mode 100644
index 0000000000000000000000000000000000000000..b14431f76d3b591254477932a5743307f568d9bb
--- /dev/null
+++ b/merge_some_into
@@ -0,0 +1,231 @@
+#!/bin/bash
+#+------------------------------------------------------------------------------------------------------------------------------+
+#| DBpedia Spotlight - Create database-backed model                                                                             |
+#| @author Joachim Daiber                                                                                                       |
+#+------------------------------------------------------------------------------------------------------------------------------+
+
+# $1 Working directory
+# $2 Locale (en_US)
+# $3 Stopwords file
+# $4 Analyzer+Stemmer language prefix e.g. Dutch
+# $5 Model target folder
+
+export MAVEN_OPTS="-Xmx26G"
+
+usage ()
+{
+  echo "index_db.sh"
+  echo "usage: ./index_db.sh -o /data/spotlight/nl/opennlp wdir nl_NL /data/spotlight/nl/stopwords.nl.list Dutch /data/spotlight/nl/final_model"
+  echo "Create a database-backed model of DBpedia Spotlight for a specified language."
+  echo " "
+}
+
+
+opennlp="None"
+eval="false"
+blacklist="false"
+
+while getopts "eo:b:" opt; do
+  case $opt in
+    o) opennlp="$OPTARG";;
+    e) eval="true";;
+    b) blacklist="$OPTARG";;
+  esac
+done
+
+
+shift $((OPTIND - 1))
+
+if [ $# != 5 ]
+then
+    usage
+    exit
+fi
+
+BASE_DIR=$(pwd)
+
+function get_path {
+  if [[ "$1"  = /* ]]
+  then
+    echo "$1"
+  else
+   echo "$BASE_DIR/$1"
+  fi
+}
+
+BASE_WDIR=$(get_path $1)
+TARGET_DIR=$(get_path $5)
+STOPWORDS=$(get_path $3)
+WDIR="$BASE_WDIR/$2"
+
+if [[ "$opennlp" != "None" ]]; then
+  opennlp=$(get_path $opennlp)
+fi
+if [[ "$blacklist" != "false" ]]; then
+  blacklist=$(get_path $blacklist)
+fi
+
+LANGUAGE=`echo $2 | sed "s/_.*//g"`
+
+echo "Language: $LANGUAGE"
+echo "Working directory: $WDIR"
+
+mkdir -p $WDIR
+
+########################################################################################################
+# Preparing the data.
+########################################################################################################
+
+echo "Loading Wikipedia dump..."
+if [ -z "$WIKI_MIRROR" ]; then
+  WIKI_MIRROR="https://dumps.wikimedia.org/"
+fi
+
+WP_DOWNLOAD_FILE=$WDIR/dump.xml
+echo Checking for wikipedia dump at $WP_DOWNLOAD_FILE
+if [ -f "$WP_DOWNLOAD_FILE" ]; then
+  echo File exists.
+else
+  echo Downloading wikipedia dump.
+  if [ "$eval" == "false" ]; then
+    curl -# "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2" | bzcat > $WDIR/dump.xml
+  else
+    curl -# "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2" | bzcat | python $BASE_DIR/scripts/split_train_test.py 1200 $WDIR/heldout.txt > $WDIR/dump.xml
+  fi
+fi
+
+cd $WDIR
+cp $STOPWORDS stopwords.$LANGUAGE.list
+
+if [ -e "$opennlp/$LANGUAGE-token.bin" ]; then
+  cp "$opennlp/$LANGUAGE-token.bin" "$LANGUAGE.tokenizer_model" || echo "tokenizer already exists"
+else
+  touch "$LANGUAGE.tokenizer_model"
+fi
+
+
+########################################################################################################
+# DBpedia extraction:
+########################################################################################################
+
+#Download:
+echo "Creating DBpedia nt files..."
+cd $BASE_WDIR
+
+if [ -d extraction-framework ]; then
+    echo "Updating DBpedia Spotlight..."
+    cd extraction-framework
+    git reset --hard HEAD
+    git pull
+    mvn install
+else
+    echo "Setting up DEF..."
+    git clone git://github.com/dbpedia/extraction-framework.git
+    cd extraction-framework
+    mvn install
+fi
+
+cd dump
+
+dumpdate=$(date +%Y%m%d)
+dumpdir=$WDIR/${LANGUAGE}wiki/${dumpdate}
+
+mkdir -p $dumpdir
+ln -s $WDIR/dump.xml $dumpdir/${LANGUAGE}wiki-${dumpdate}-dump.xml
+
+cat << EOF > dbpedia.properties
+base-dir=$WDIR
+wiki=$LANGUAGE
+locale=$LANGUAGE
+source=dump.xml
+require-download-complete=false
+languages=$LANGUAGE
+ontology=../ontology.xml
+mappings=../mappings
+uri-policy.uri=uri:en; generic:en; xml-safe-predicates:*
+format.nt.gz=n-triples;uri-policy.uri
+EOF
+
+if [[ ",ga,ar,be,bg,bn,ced,cs,cy,da,eo,et,fa,fi,gl,hi,hr,hu,id,ja,lt,lv,mk,mt,sk,sl,sr,tr,ur,vi,war,zh," == *",$LANGUAGE,"* ]]; then #Languages with no disambiguation definitions
+     echo "extractors=.RedirectExtractor,.MappingExtractor" >> dbpedia.properties
+else
+     echo "extractors=.RedirectExtractor,.DisambiguationExtractor,.MappingExtractor" >> dbpedia.properties
+fi
+
+../run extraction dbpedia.properties
+
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-instance-types*.nt.gz > $WDIR/instance_types.nt
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-disambiguations-unredirected.nt.gz > $WDIR/disambiguations.nt
+zcat $dumpdir/${LANGUAGE}wiki-${dumpdate}-redirects.nt.gz > $WDIR/redirects.nt
+
+rm -Rf $dumpdir
+
+########################################################################################################
+# Setting up Spotlight:
+########################################################################################################
+
+cd $BASE_WDIR
+
+if [ -d dbpedia-spotlight ]; then
+    echo "Updating DBpedia Spotlight..."
+    cd dbpedia-spotlight
+    git reset --hard HEAD
+    git pull
+    mvn -T 1C -q clean install
+else
+    echo "Setting up DBpedia Spotlight..."
+    git clone --depth 1 https://github.com/dbpedia-spotlight/dbpedia-spotlight-model
+    mv dbpedia-spotlight-model dbpedia-spotlight
+    cd dbpedia-spotlight
+    mvn -T 1C -q clean install
+fi
+
+
+########################################################################################################
+# Extracting wiki stats:
+########################################################################################################
+
+cd $BASE_WDIR
+rm -Rf wikistatsextractor
+git clone --depth 1 https://github.com/dbpedia-spotlight/wikistatsextractor
+
+# Stop processing if one step fails
+set -e
+
+#Copy results to local:
+cd $BASE_WDIR/wikistatsextractor
+mvn install exec:java -Dexec.args="--output_folder $WDIR $LANGUAGE $2 $4Stemmer $WDIR/dump.xml $WDIR/stopwords.$LANGUAGE.list"
+
+if [ "$blacklist" != "false" ]; then
+  echo "Removing blacklist URLs..."
+  mv $WDIR/uriCounts $WDIR/uriCounts_all
+  grep -v -f $blacklist $WDIR/uriCounts_all > $WDIR/uriCounts
+fi
+
+echo "Finished wikistats extraction. Cleaning up..."
+rm -f $WDIR/dump.xml
+
+
+########################################################################################################
+# Building Spotlight model:
+########################################################################################################
+
+#Create the model:
+cd $BASE_WDIR/dbpedia-spotlight
+
+mvn -pl index exec:java -Dexec.mainClass=org.dbpedia.spotlight.db.CreateSpotlightModel -Dexec.args="$2 $WDIR $TARGET_DIR $opennlp $STOPWORDS $4Stemmer"
+
+if [ "$eval" == "true" ]; then
+  mvn -pl eval exec:java -Dexec.mainClass=org.dbpedia.spotlight.evaluation.EvaluateSpotlightModel -Dexec.args="$TARGET_DIR $WDIR/heldout.txt" > $TARGET_DIR/evaluation.txt
+fi
+
+curl https://raw.githubusercontent.com/dbpedia-spotlight/model-quickstarter/master/model_readme.txt > $TARGET_DIR/README.txt
+curl "$WIKI_MIRROR/${LANGUAGE}wiki/latest/${LANGUAGE}wiki-latest-pages-articles.xml.bz2-rss.xml" | grep link | sed -e 's/^.*<link>//' -e 's/<[/]link>.*$//' | uniq >> $TARGET_DIR/README.txt
+
+
+echo "Collecting data..."
+cd $BASE_DIR
+mkdir -p data/$LANGUAGE && mv $WDIR/*Counts data/$LANGUAGE
+gzip $WDIR/*.nt &
+
+set +e