# this is used in a separate extraction for wikidata # make sure to have the .WikidataRawExtractor already run before and run redirect script on wikidata_raw_unredirected before that!!! #################################### # download and extraction target dir #base-dir= moved to $extraction-framework/core/src/main/resources/universal.properties # Source file. If source file name ends with .gz or .bz2, it is unzipped on the fly. # Must exist in the directory xxwiki/yyyymmdd and have the prefix xxwiki-yyyymmdd- # where xx is the wiki code and yyyymmdd is the dump date. # default: #source=# moved to $extraction-framework/core/src/main/resources/universal.properties # use only directories that contain a 'download-complete' file? Default is false. require-download-complete=true # List of languages or article count ranges, e.g. 'en,de,fr' or '10000-20000' or '10000-', or '@mappings' languages=wikidata # extractor class names starting with "." are prefixed by "org.dbpedia.extraction.mappings" extractors=.PageIdExtractor,.RedirectExtractor,.RevisionIdExtractor,.ProvenanceExtractor,.WikiPageLengthExtractor extractors.wikidata=.WikidataLexemeExtractor,.WikidataR2RExtractor,.WikidataRawExtractor,.WikidataReferenceExtractor,.WikidataAliasExtractor,.WikidataLabelExtractor,.WikidataNameSpaceSameAsExtractor,.WikidataPropertyExtractor,.WikidataLabelExtractor,.WikidataDescriptionExtractor,.WikidataSameAsExtractor