Skip to content
Snippets Groups Projects
Commit 860e468e authored by Sebastian Hellmann's avatar Sebastian Hellmann
Browse files

new options for abstracts

parent c81a3e2f
No related branches found
No related tags found
No related merge requests found
...@@ -8,8 +8,7 @@ base-url=http://dumps.wikimedia.your.org/ ...@@ -8,8 +8,7 @@ base-url=http://dumps.wikimedia.your.org/
# source=pages-articles.xml.bz2 # source=pages-articles.xml.bz2
# languages to download # languages to download
languages=en languages=en,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
#,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
# Unzip files while downloading? Not necessary, extraction will unzip on the fly. Let's save space. # Unzip files while downloading? Not necessary, extraction will unzip on the fly. Let's save space.
unzip=false unzip=false
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#log-dir= see: ../core/src/main/resources/universal.properties #log-dir= see: ../core/src/main/resources/universal.properties
# WikiPages failed to extract in the first try can be retried with this option (especially interesting when extraction from the mediawiki api) # WikiPages failed to extract in the first try can be retried with this option (especially interesting when extraction from the mediawiki api)
retry-failed-pages=false #retry-failed-pages=true
# Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly. # Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly.
# Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd- # Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd-
...@@ -23,12 +23,13 @@ require-download-complete=false ...@@ -23,12 +23,13 @@ require-download-complete=false
# List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings' # List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings'
# NOTE sync with minidumps # NOTE sync with minidumps
languages=af,als,am,an,arz,ast,azb,ba,bar,bat-smg,bpy,br,bs,bug,cdo,ce,ceb,ckb,cv,fo,fy,gd,he,hsb,ht,ia,ilo,io,is,jv,ka,kn,ku,ky,la,lb,li,lmo,mai,mg,min,ml,mn,mr,mrj,ms,mt,my,mzn,nah,nap,nds,ne,new,nn,no,oc,or,os,pa,pms,pnb,qu,sa,sah,scn,sco,sh,si,simple,sq,su,sw,ta,te,tg,th,tl,tt,uz,vec,wa,xmf,yo,zh-min-nan,zh-yue languages=en,af,als,am,an,ar,arz,ast,azb,az,ba,bar,bat-smg,be,bg,bn,bpy,br,bs,bug,ca,cdo,ceb,ce,ckb,cs,cv,cy,da,de,el,eml,eo,es,et,eu,fa,fi,fo,fr,fy,ga,gd,gl,gu,he,hi,hr,hsb,ht,hu,hy,ia,id,ilo,io,is,it,ja,jv,ka,kk,kn,ko,ku,ky,la,lb,li,lmo,lt,lv,mai,mg,mhr,min,mk,ml,mn,mrj,mr,ms,my,mzn,nap,nds,ne,new,nl,nn,no,oc,or,os,pa,pl,pms,pnb,pt,qu,ro,ru,sah,sa,scn,sco,sd,sh,si,simple,sk,sl,sq,sr,su,sv,sw,ta,te,tg,th,tl,tr,tt,uk,ur,uz,vec,vi,vo,wa,war,wuu,xmf,yi,yo,zh,zh-min-nan,zh-yue
# default namespaces: Main, File, Category, Template # default namespaces: Main, File, Category, Template
# we only want abstracts for articles -> only main namespace # we only want abstracts for articles -> only main namespace
namespaces=Main #namespaces=Main
# extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings" # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
parallel-processes=2
extractors=.NifExtractor extractors=.NifExtractor
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment