Skip to content
Snippets Groups Projects
Commit ba111868 authored by Your Name's avatar Your Name
Browse files
parents 973094a3 fb4423da
No related branches found
No related tags found
No related merge requests found
Showing
with 200 additions and 1 deletion
databus-maven-plugin/
databus-poms/dbpedia/*/*/*/
extraction-framework/
wikidumps/
logs/
......
# Anchor text of Wikilinks
Human readable text (anchor text) of Wikilinks to refer to other Wikipedia articles
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2018.08.15</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>anchor-text</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/AnchorTextExtractor.scala</databus.codeReference>
</properties>
</project>
# Used Wikipedia Templates per Article
Dataset contains identifiers for used templates per Wikipedia article using the wikiPageUsesTemplate property
Template identifiers (objects) use prefix `http://dbpedia.org/resource/Template:` followed by template name.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>article-templates</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/ArticleTemplatesExtractor.scala</databus.codeReference>
</properties>
</project>
# Wikipedia Article Categories and categories metadata
Contains the Wikipedia categories per Article and category metadata (hierarchy and labels)
The dataset is split into 3 different types of files. The `_articles` files contain the Wikipedia categories assigned per article connected via `dct:subject`. The `skos` files partially describe the Wikipedia Category system modelled with the [SKOS vocabulary]. For each category `skos:prefLabel`, parent categories (using `skos:broader`), and `skos:related` categories are extracted. The `labels` categories contain the `rdfs:label` of the category resources.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>categories</artifactId>
<packaging>jar</packaging>
</project>
# Citations and References
Contains citations from articles and bibliographic information of the references
The `_links` files contain the citations of one article by linking the citation URL via `isCitedBy` property. The `_data` files contain bibliographic metadata (e.g. format, title, author, website, accessdate, etc. if available) for the citations.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>citations</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/CitationExtractor.scala</databus.codeReference>
</properties>
</project>
# Commons sameAs Links
Links non-commons DBpedia resources to their DBpedia Commons
Extracts `owl:sameAs` to Commons Resources based on `{{Commons}}` template.
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>commons-sameas-links</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/CommonsResourceExtractor.scala</databus.codeReference>
</properties>
</project>
# Entity Disambiguation Links
Disambiguation Links extracted from Wikipedia disambiguation pages
The links are extracted from [Wikipedia](http://en.wikipedia.org/wiki/Wikipedia:Disambiguation disambiguation) pages. Since Wikipedia has no syntax to distinguish disambiguation links from ordinary links, heuristics are used.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>disambiguations</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/DisambiguationExtractor.scala</databus.codeReference>
</properties>
</project>
# External links
Links extracted from Wikitext to external web pages
Every link spotted in the Wikitext of a resource will be added via `wikiPageExternalLink` property.
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>external-links</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/org/dbpedia/extraction/mappings/ExternalLinksExtractor.scala</databus.codeReference>
</properties>
</project>
# geo-coordinates dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>geo-coordinates</artifactId>
<packaging>jar</packaging>
</project>
# homepages dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>homepages</artifactId>
<packaging>jar</packaging>
</project>
# Extracted facts from Wikipedia Infoboxes
Data from Wikipedia's infoboxes, as is with some smart automatic (generic) parsing. This dataset has the best fact coverage, but has less consistency than the cleaned mapping infobox dataset.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment