Newer
Older
# this is used in a separate extraction for wikidata
# make sure to have the .WikidataRawExtractor already run before and run redirect script on wikidata_raw_unredirected before that!!!
####################################
# download and extraction target dir
#base-dir= moved to $extraction-framework/core/src/main/resources/universal.properties
# Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly.
# Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd-
# where xx is the wiki code and yyyymmdd is the dump date.
# default:
#source=# moved to $extraction-framework/core/src/main/resources/universal.properties
# use only directories that contain a 'download-complete' file? Default is false.
require-download-complete=true
# List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings'
languages=wikidata
# extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings"
extractors=.PageIdExtractor,.RedirectExtractor,.RevisionIdExtractor,.ProvenanceExtractor,.WikiPageLengthExtractor
extractors.wikidata=.WikidataLexemeExtractor,.WikidataR2RExtractor,.WikidataRawExtractor,.WikidataReferenceExtractor,.WikidataAliasExtractor,.WikidataLabelExtractor,.WikidataNameSpaceSameAsExtractor,.WikidataPropertyExtractor,.WikidataLabelExtractor,.WikidataDescriptionExtractor,.WikidataSameAsExtractor