Skip to content
Snippets Groups Projects
Commit ba111868 authored by Your Name's avatar Your Name
Browse files
parents 973094a3 fb4423da
No related branches found
No related tags found
No related merge requests found
Showing
with 401 additions and 0 deletions
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>infobox-properties</artifactId>
<packaging>jar</packaging>
</project>
# infobox-property-definitions dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>infobox-property-definitions</artifactId>
<packaging>jar</packaging>
</project>
# interlanguage-links dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>interlanguage-links</artifactId>
<packaging>jar</packaging>
</project>
# Wikipedia page title as rdfs:label
Contains rdfs:label for every resource based on Wikipedia article title
The Wikipedia page article is extracted as rdfs:label using the 2 letter language code of the Wikipedia version as RDF language tag. Only labels for Article (Main namespace) are contained. Labels for Categories are stored in [categories dataset](https://databus.dbpedia.org/dbpedia/generic/categories).
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>labels</artifactId>
<packaging>jar</packaging>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/LabelExtractor.scala</databus.codeReference>
</properties>
</project>
# page dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>page</artifactId>
<packaging>jar</packaging>
</project>
# persondata dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>persondata</artifactId>
<packaging>jar</packaging>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<!-- the super-pom deactivates software compilation and configures the plugin to run in default phases -->
<parent>
<groupId>org.dbpedia.databus</groupId>
<artifactId>super-pom</artifactId>
<version>1.3-SNAPSHOT</version>
</parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<packaging>pom</packaging>
<version>2020.02.01</version>
<modules>
<!--module>anchor-text</module-->
<module>article-templates</module>
<module>categories</module>
<module>citations</module>
<module>commons-sameas-links</module>
<module>disambiguations</module>
<module>external-links</module>
<module>geo-coordinates</module>
<module>homepages</module>
<module>infobox-properties</module>
<module>infobox-property-definitions</module>
<module>interlanguage-links</module>
<module>labels</module>
<module>page</module>
<module>persondata</module>
<module>redirects</module>
<module>revisions</module>
<module>topical-concepts</module>
<module>wikilinks</module>
<module>wikipedia-links</module>
</modules>
<properties>
<databus.codeReference>https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/</databus.codeReference>
<databus.issueTracker>https://github.com/dbpedia/extraction-framework/issues</databus.issueTracker>
<databus.documentationLocation>https://github.com/dbpedia/databus-maven-plugin/blob/master/dbpedia/${project.groupId}/${project.artifactId}</databus.documentationLocation>
<databus.feedbackChannel>https://forum.dbpedia.org/c/data/databus/14</databus.feedbackChannel>
<databus.tryVersionAsIssuedDate>true</databus.tryVersionAsIssuedDate>
<databus.packageDirectory>
/media/bigone/25TB/www/downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId}
</databus.packageDirectory>
<databus.downloadUrlPath>
https://downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId}/${project.version}/
</databus.downloadUrlPath>
<databus.publisher>https://webid.dbpedia.org/webid.ttl#this</databus.publisher>
<databus.license>http://purl.oclc.org/NET/rdflicense/cc-by3.0</databus.license>
<databus.documentation><![CDATA[
## Attribution fulfilled by
* (when deriving another dataset and releasing to the Databus) adding the Databus link to the provenance https://databus.dbpedia.org/dbpedia/${project.groupId}/${project.artifactId}/${project.artifactId}/${project.version}
* on your website:
* include the DBpedia logo and mention the usage of DBpedia with this link: https://databus.dbpedia.org/dbpedia
* include backlinks from your pages to the individual entities, e.g. http://dbpedia.org/resource/Berlin
* in academic publications cite: DBpedia - A Large-scale, Multilingual Knowledge Base Extracted from Wikipedia, J. Lehmann, R. Isele, M. Jakob, A. Jentzsch, D. Kontokostas, P. Mendes, S. Hellmann, M. Morsey, P. van Kleef, S. Auer, and C. Bizer. Semantic Web Journal 6 (2): 167--195 (2015)
## How to contribute
DBpedia is a community project, help us with this dataset:
* improve the software at: https://github.com/dbpedia/extraction-framework
* improve this documentation in the pom.xml at
https://github.com/dbpedia/databus-maven-plugin/tree/master/dbpedia/generic/pom.xml
## Origin
This dataset was extracted using the wikipedia-dumps available on https://dumps.wikimedia.org/
using the DBpedia Extraction-Framework available at https://github.com/dbpedia/extraction-framework
For more technical information on how these datasets were generated, please visit http://dev.dbpedia.org
## Changelog
**2019.09.01**
* fixed URI encoding and produce more valid triples
**2019.08.01 -- skipped**
* no release, downtime due to maintenance and integration of fixes
**2018.12.14 - 2019.07.01**
* Debugging period, acii2uni form before messed up URI encoding
* Implemented [new ci test approach](https://forum.dbpedia.org/t/new-ci-tests-on-dbpedia-releases/77)
**2018.08.15 - 2018.12.14**
* were created as new modular releases, some issues remain:
* language normalisation to iso codes, zh-min-nan to nan, zh-yue to yue, bat-smg to batsmg (no iso code available)
* we used rapper 2.0.14 to parse and `LC_ALL=C sort` to sort and ascii2uni -a U to unescape unicdoe
characters
* link to Wikimedia dump version is missing
]]></databus.documentation>
<!-- used for derive plugin -->
<databus.deriveversion>2019.10.01</databus.deriveversion>
</properties>
<!-- currently still needed to find the super-pom, once the super-pom is in maven central,
this can be removed as well -->
<repositories>
<repository>
<id>archiva.internal</id>
<name>Internal Release Repository</name>
<url>http://databus.dbpedia.org:8081/repository/internal</url>
</repository>
<repository>
<id>archiva.snapshots</id>
<name>Internal Snapshot Repository</name>
<url>http://databus.dbpedia.org:8081/repository/snapshots</url>
<snapshots>
<updatePolicy>always</updatePolicy>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.dbpedia.databus</groupId>
<artifactId>databus-derive-maven-plugin</artifactId>
<version>1.0-SNAPSHOT</version>
<executions>
<execution>
<id>DeriveFromMarvin</id>
<!--phase>initialize</phase-->
<goals>
<goal>clone</goal>
</goals>
</execution>
</executions>
<configuration>
<skipParsing>false</skipParsing>
<skipCloning>false</skipCloning>
<!-- query
PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
SELECT distinct (?derive) WHERE {
?dataset dataid:group <https://databus.dbpedia.org/marvin/generic> .
?dataset dataid:artifact ?artifact .
?dataset dataid:version ?version .
?dataset dct:hasVersion "2019.08.30"^^xsd:string
BIND (CONCAT("<version>",?artifact,"/${databus.deriveversion}</version>") as ?derive)
}
order by asc(?derive)
-->
<versions>
<version>https://databus.dbpedia.org/marvin/generic/article-templates/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/categories/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/citations/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/commons-sameas-links/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/disambiguations/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/external-links/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/geo-coordinates/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/homepages/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/infobox-properties/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/infobox-property-definitions/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/interlanguage-links/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/labels/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/page/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/persondata/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/redirects/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/revisions/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/topical-concepts/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/wikilinks/${databus.deriveversion}</version>
<version>https://databus.dbpedia.org/marvin/generic/wikipedia-links/${databus.deriveversion}</version>
</versions>
</configuration>
</plugin>
</plugins>
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-webdav-jackrabbit</artifactId>
<version>3.0.0</version>
</extension>
</extensions>
</build>
<profiles>
<profile>
<id>webdav</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>wagon-maven-plugin</artifactId>
<version>2.0.0</version>
<executions>
<execution>
<id>upload-databus</id>
<phase>install</phase>
<goals>
<goal>upload</goal>
</goals>
<configuration>
<fromDir>${project.build.directory}/databus/repo/${project.groupId}/${project.artifactId}</fromDir>
<url>dav:https://downloads.dbpedia.org/repo/</url>
<toDir>dbpedia/${project.groupId}/${project.artifactId}</toDir>
<serverId>downloads-dbpedia-org</serverId>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>redirects</artifactId>
<packaging>jar</packaging>
</project>
# redirects dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>revisions</artifactId>
<packaging>jar</packaging>
</project>
# revisions dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>topical-concepts</artifactId>
<packaging>jar</packaging>
</project>
# topical-concepts dataset
help needed
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>generic</groupId>
<artifactId>group-metadata</artifactId>
<version>2020.02.01</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>generic</groupId>
<artifactId>wikilinks</artifactId>
<packaging>jar</packaging>
</project>
# wikilinks dataset
help needed
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment