diff --git a/databus-poms/dbpedia/text/equations/equations.md b/databus-poms/dbpedia/text/equations/equations.md new file mode 100644 index 0000000000000000000000000000000000000000..c697cc58d93a6af0a53a40967061a84633534098 --- /dev/null +++ b/databus-poms/dbpedia/text/equations/equations.md @@ -0,0 +1,2 @@ +# Equations in Wikipedia articles +Equations extracted from the Wikipedia articles. diff --git a/databus-poms/dbpedia/text/equations/pom.xml b/databus-poms/dbpedia/text/equations/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..1836b80e608802ad937d407e8e2316c9eb834a61 --- /dev/null +++ b/databus-poms/dbpedia/text/equations/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>equations</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>Equations@en</label> + </labels> + <datasetDescription>Equations extracted from the Wikipedia articles.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/long-abstracts/long-abstracts.md b/databus-poms/dbpedia/text/long-abstracts/long-abstracts.md new file mode 100644 index 0000000000000000000000000000000000000000..1012317486e5dc9e9834957491537dc3b302ed03 --- /dev/null +++ b/databus-poms/dbpedia/text/long-abstracts/long-abstracts.md @@ -0,0 +1,4 @@ +# Long abstracts from Wikipedia articles +The text before the table of contents of Wikipedia articles + +Using the property `dbo:abstract` all content before the table of contents is extracted. diff --git a/databus-poms/dbpedia/text/long-abstracts/pom.xml b/databus-poms/dbpedia/text/long-abstracts/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..b390aa1bf0142ce22b087fc9b18d2d56f28dca36 --- /dev/null +++ b/databus-poms/dbpedia/text/long-abstracts/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>long-abstracts</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>Long Abstracts@en</label> + </labels> + <datasetDescription>Long abstracts (full abstracts) of Wikipedia articles, usually the first section, before table of contents as rdfs:comment.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/nif-context/nif-context.md b/databus-poms/dbpedia/text/nif-context/nif-context.md new file mode 100644 index 0000000000000000000000000000000000000000..cc6e85ac8f8a0a97ad41e4ba99eb48858eb5e049 --- /dev/null +++ b/databus-poms/dbpedia/text/nif-context/nif-context.md @@ -0,0 +1,2 @@ +# Wikipedia Articles Text +The text/content of the Wikipedia articles. diff --git a/databus-poms/dbpedia/text/nif-context/pom.xml b/databus-poms/dbpedia/text/nif-context/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..5a577ca8f7e495a07699a360b637026eab6f17df --- /dev/null +++ b/databus-poms/dbpedia/text/nif-context/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>nif-context</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>NIF Context@en</label> + </labels> + <datasetDescription>The text/contents of the Wikipedia article.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/nif-page-structure/nif-page-structure.md b/databus-poms/dbpedia/text/nif-page-structure/nif-page-structure.md new file mode 100644 index 0000000000000000000000000000000000000000..76d0136574415665d988f2d20f7475f3944bc6cf --- /dev/null +++ b/databus-poms/dbpedia/text/nif-page-structure/nif-page-structure.md @@ -0,0 +1,2 @@ +# Wikipedia Articles Structure +The structure of the Wikipedia article including sections, sub-sections and paragraphs. diff --git a/databus-poms/dbpedia/text/nif-page-structure/pom.xml b/databus-poms/dbpedia/text/nif-page-structure/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..c18d650ee2538453fb027b18eac09795ae841250 --- /dev/null +++ b/databus-poms/dbpedia/text/nif-page-structure/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>nif-page-structure</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>NIF Page Structure@en</label> + </labels> + <datasetDescription>The structure of the Wikipedia article including sections, sub-sections and paragraphs.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/nif-text-links/nif-text-links.md b/databus-poms/dbpedia/text/nif-text-links/nif-text-links.md new file mode 100644 index 0000000000000000000000000000000000000000..ba57fdca543a3d5dbc3c394b7356cfff1852f3e8 --- /dev/null +++ b/databus-poms/dbpedia/text/nif-text-links/nif-text-links.md @@ -0,0 +1,2 @@ +# Links in Wikipedia articles +The links found in the Wikipedia articles. diff --git a/databus-poms/dbpedia/text/nif-text-links/pom.xml b/databus-poms/dbpedia/text/nif-text-links/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..05f88d5311f1cf596413a60abdf547c8d1e7a9ae --- /dev/null +++ b/databus-poms/dbpedia/text/nif-text-links/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>nif-text-links</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>NIF Text Links@en</label> + </labels> + <datasetDescription>The links found in the Wikipedia articles.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/pom.xml b/databus-poms/dbpedia/text/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..f3f68d3a267784b8a9db43eb392dc495294d1cbf --- /dev/null +++ b/databus-poms/dbpedia/text/pom.xml @@ -0,0 +1,91 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <!-- the super-pom deactivates software compilation and configures the plugin to run in default phases --> + <parent> + <groupId>org.dbpedia.databus</groupId> + <artifactId>super-pom</artifactId> + <version>1.3-SNAPSHOT</version> + </parent> + <groupId>text</groupId> + <artifactId>group-metadata</artifactId> + <packaging>pom</packaging> + <version>2020.02.01</version> + + <modules> + <module>short-abstracts</module> + <module>long-abstracts</module> + <module>nif-context</module> + <module>nif-page-structure</module> + <module>nif-text-links</module> + <module>equations</module> + <module>raw-tables</module> + </modules> + + <properties> + <databus.tryVersionAsIssuedDate>true</databus.tryVersionAsIssuedDate> + <databus.packageDirectory> + /media/bigone/25TB/www/downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId} + </databus.packageDirectory> + <databus.downloadUrlPath> + https://downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId}/${project.version}/ + </databus.downloadUrlPath> + <databus.publisher>https://mdw.dojchinovski.github.io/webid.ttl#this</databus.publisher> + <!-- moved to settings.xml + databus.pkcs12File>${user.home}/.m2/certificate_generic.pfx</databus.pkcs12File--> + <databus.maintainer>https://mdw.dojchinovski.github.io/webid.ttl#this</databus.maintainer> + <databus.license>http://purl.oclc.org/NET/rdflicense/cc-by-sa3.0</databus.license> + <databus.documentation><![CDATA[ +## Attribution fulfilled by +* (when deriving another dataset and releasing to the Databus) adding the Databus link to the provenance https://databus.dbpedia.org/dbpedia/${project.groupId}/${project.artifactId}/${project.artifactId}/${project.version} +* on your website: + * include the DBpedia logo and mention the usage of DBpedia with this link: https://databus.dbpedia.org/dbpedia + * include backlinks from your pages to the individual entities, e.g. http://dbpedia.org/resource/Berlin +* in academic publications cite: DBpedia - A Large-scale, Multilingual Knowledge Base Extracted from Wikipedia, J. Lehmann, R. Isele, M. Jakob, A. Jentzsch, D. Kontokostas, P. Mendes, S. Hellmann, M. Morsey, P. van Kleef, S. Auer, and C. Bizer. Semantic Web Journal 6 (2): 167--195 (2015) + + +## How to contribute +DBpedia is a community project, help us with this dataset: +* improve this documentation at https://github.com/dbpedia/databus-maven-plugin/tree/master/dbpedia/mappings/${project.artifactId}/${project.artifactId}.md + +## Origin +This dataset was extracted using the wikipedia-dumps available on https://dumps.wikimedia.org/ +using the DBpedia Extraction-Framework available at https://github.com/dbpedia/extraction-framework +For more technical information on how these datasets were generated, please visit http://dev.dbpedia.org + +## Issues +* documentation is too unspecific, the text extraction needs a more detailed description and better links +* there seems to be invalid characters in the dataid file, however, the loading and rdf parsing work fine + +# Changelog +## 2016.10.01 +* old dataset from 2016 +* quite old, but work on new releases is on it's way +* mimetype is detected by a parse over the 10-line preview, line by line. If no wrong triples are found, we assume that it is ntriples, which is a subset of turtle, however, this fails here, sometimes. +* normalisation of langs: zh_yue to yue, zh_min_nan to nan, bat_smg to batsmg +* we used rapper 2.0.14 to parse and `LC_ALL=C sort` to sort and ascii2uni -a U to unescape unicdoe +characters +* link to Wikimedia dump version is missing + ]]></databus.documentation> + </properties> + + <!-- currently still needed to find the super-pom, once the super-pom is in maven central, + this can be removed as well --> + <repositories> + <repository> + <id>archiva.internal</id> + <name>Internal Release Repository</name> + <url>http://databus.dbpedia.org:8081/repository/internal</url> + </repository> + <repository> + <id>archiva.snapshots</id> + <name>Internal Snapshot Repository</name> + <url>http://databus.dbpedia.org:8081/repository/snapshots</url> + <snapshots> + <updatePolicy>always</updatePolicy> + </snapshots> + </repository> + </repositories> + +</project> diff --git a/databus-poms/dbpedia/text/raw-tables/pom.xml b/databus-poms/dbpedia/text/raw-tables/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..992b28c5e34453189ebec56411f5bed6fd9a551e --- /dev/null +++ b/databus-poms/dbpedia/text/raw-tables/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>raw-tables</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>Raw Tables@en</label> + </labels> + <datasetDescription>Raw tables extracted from the Wikipedia articles.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/raw-tables/raw-tables.md b/databus-poms/dbpedia/text/raw-tables/raw-tables.md new file mode 100644 index 0000000000000000000000000000000000000000..11b67beef2428ebe7144da1075b8f4a041e185ca --- /dev/null +++ b/databus-poms/dbpedia/text/raw-tables/raw-tables.md @@ -0,0 +1,2 @@ +# Tables from Wikipedia articles +Raw tables extracted from the Wikipedia articles. diff --git a/databus-poms/dbpedia/text/short-abstracts/pom.xml b/databus-poms/dbpedia/text/short-abstracts/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..38980170aa143ef57cde5ae3cac5069c27b2a53b --- /dev/null +++ b/databus-poms/dbpedia/text/short-abstracts/pom.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>text</groupId> + <version>2020.02.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>short-abstracts</artifactId> + <groupId>text</groupId> + <packaging>jar</packaging> + <version>2020.02.01</version> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-maven-plugin</artifactId> + <configuration> + <labels> + <label>Short Abstracts@en</label> + </labels> + <datasetDescription>Short Abstracts (2 or 3 sentences, about 600 characters long) of Wikipedia articles.</datasetDescription> + </configuration> + </plugin> + </plugins> + </build> + +</project> diff --git a/databus-poms/dbpedia/text/short-abstracts/short-abstracts.md b/databus-poms/dbpedia/text/short-abstracts/short-abstracts.md new file mode 100644 index 0000000000000000000000000000000000000000..258ab0cc3d3a20f29f0eb0624c54749a69ec1d8d --- /dev/null +++ b/databus-poms/dbpedia/text/short-abstracts/short-abstracts.md @@ -0,0 +1,5 @@ +# Shortened abstracts from Wikipedia articles +The text before the table of contents of Wikipedia articles is shortened to approx. 2-3 sentences. + +Using the property rdfs:comment all full sentences within a certain amount of characters are saved in the different files. + diff --git a/databus-poms/dbpedia/wikidata/alias/alias.md b/databus-poms/dbpedia/wikidata/alias/alias.md new file mode 100644 index 0000000000000000000000000000000000000000..d53dea4fa0ef419ab916fb60b3a15cdfc40ab380 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/alias/alias.md @@ -0,0 +1,13 @@ +# Wikidata Alias +Wikidata-specific dataset containing aliases for languages in the mappings wiki. Aliases for languages not in the mapping wiki are found in the _nmw variant. + +Aliases are alternative names for items that are placed in the Also known as column of the table on top of every Wikidata item page. +From https://www.wikidata.org/wiki/Help:Aliases +There can be several aliases for each item, but only one label (other dataset). + +## Issues +Currently data is grouped in two categories: +* aliases from mapping wiki languages (around 40) +* aliases from all other languages +In the future, we might separate these into one file per language, which will increase the number of files from 2 (nmw as content variant) now, to many (basically using iso codes as content variants, beyond the 120 wikipedia versions) +This is under discussion however. diff --git a/databus-poms/dbpedia/wikidata/alias/pom.xml b/databus-poms/dbpedia/wikidata/alias/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..407bfcad17ab7657056234756169c73f6cbcdf22 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/alias/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>alias</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/debug/debug.md b/databus-poms/dbpedia/wikidata/debug/debug.md new file mode 100644 index 0000000000000000000000000000000000000000..eb773044906c8cff4134ae8cce7f21ea0d350110 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/debug/debug.md @@ -0,0 +1,4 @@ +# Wikidata Debug Duplicate IRI Split +Debug information about duplicate IRIs and r2r mapping errors + +TODO diff --git a/databus-poms/dbpedia/wikidata/debug/pom.xml b/databus-poms/dbpedia/wikidata/debug/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..e43a353d522f83fb6e62f6d89f2a9badaf035986 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/debug/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>debug</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/description/description.md b/databus-poms/dbpedia/wikidata/description/description.md new file mode 100644 index 0000000000000000000000000000000000000000..91268e17e4639eb54f30c1a1e65a59df0aba725e --- /dev/null +++ b/databus-poms/dbpedia/wikidata/description/description.md @@ -0,0 +1,7 @@ +# Wikidata Description +Wikidata-specific dataset containing descriptions for languages in the mappings wiki. + +Descriptions for languages not in the mappings wiki are found in the _nmw variant. + +Respobsible extractor: +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/wikidata/WikidataDescriptionExtractor.scala diff --git a/databus-poms/dbpedia/wikidata/description/pom.xml b/databus-poms/dbpedia/wikidata/description/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..caf92b098e5103f1237d53f314e6e2f210bac284 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/description/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>description</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/geo-coordinates/geo-coordinates.md b/databus-poms/dbpedia/wikidata/geo-coordinates/geo-coordinates.md new file mode 100644 index 0000000000000000000000000000000000000000..9a53faf798e2c0cb7a3a647fb26bda978b615257 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/geo-coordinates/geo-coordinates.md @@ -0,0 +1,7 @@ +# Geo Coordinates +Geographic coordinates extracted from Wikidata + + +Some properties from Wikidata, notably P625, are transformed to wgs84 and geo. +The transformation is described here: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json#L92 diff --git a/databus-poms/dbpedia/wikidata/geo-coordinates/pom.xml b/databus-poms/dbpedia/wikidata/geo-coordinates/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..727b7f7caed4011705b6600b42f081f314215dd7 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/geo-coordinates/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>geo-coordinates</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/images/images.md b/databus-poms/dbpedia/wikidata/images/images.md new file mode 100644 index 0000000000000000000000000000000000000000..225bf9a2fde99e931a7f1f18eb9cc1a6a3aae6eb --- /dev/null +++ b/databus-poms/dbpedia/wikidata/images/images.md @@ -0,0 +1,7 @@ +# Wikidata Images +Images from Wikidata item + +Some properties from Wikidata, notably P18, are transformed to foaf:depiction and dbo:thumbnail. +The transformation is described here: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json#L111 + diff --git a/databus-poms/dbpedia/wikidata/images/pom.xml b/databus-poms/dbpedia/wikidata/images/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..0fddd23e7f43dc92ec6237a5fbe0fdc8981c2d5b --- /dev/null +++ b/databus-poms/dbpedia/wikidata/images/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>images</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/instance-types/instance-types.md b/databus-poms/dbpedia/wikidata/instance-types/instance-types.md new file mode 100644 index 0000000000000000000000000000000000000000..176a9eaa74210a0bed6c334baeb4931aafb2da91 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/instance-types/instance-types.md @@ -0,0 +1,31 @@ +# Wikidata Instance Types +Contains triples of the form $object rdf:type $class + +Using P31 as rdf:type . +If the typed class is in in the DBpedia Ontology, it will be used, otherwise the type is discarded. +E.g. + +``` +<QXYZ> <P31> <Q5> . # Q5 is Person +<Q5> owl:equivalentClass dbo:Person . +------------------------ +<QXYZ> rdf:type dbo:Person . +``` +Function used: +``` + "P31": [ + { + "rdf:type": "$getDBpediaClass" + } +], + +``` + +The extractor uses the data from ontology-subclass-of artifact to optimize the hierarchy and enrich equivalent classes. + +The mappings between Wikidata Items and classes can be edited in the [Mappings Wiki](http://mappings.dbpedia.org/index.php/OntologyClass:Person) + +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json#L92 +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json#L87 + + diff --git a/databus-poms/dbpedia/wikidata/instance-types/pom.xml b/databus-poms/dbpedia/wikidata/instance-types/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..6c35e03427c0098af9569228ebdf10bef7593c65 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/instance-types/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>instance-types</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/labels/labels.md b/databus-poms/dbpedia/wikidata/labels/labels.md new file mode 100644 index 0000000000000000000000000000000000000000..68c727a54cce298f2761b7106f5a8b40d867e745 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/labels/labels.md @@ -0,0 +1,5 @@ +# Wikidata Labels +Contains the name in all the languages in wikidata. + +For each language only one label is used, more labels in the alias artifact. +Names in the languages not available in the mappings wiki are found in the _nmw variant. diff --git a/databus-poms/dbpedia/wikidata/labels/pom.xml b/databus-poms/dbpedia/wikidata/labels/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..a768f051aee9271a8ff3297fcab30376ba9ed6ba --- /dev/null +++ b/databus-poms/dbpedia/wikidata/labels/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>labels</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/mappingbased-literals/mappingbased-literals.md b/databus-poms/dbpedia/wikidata/mappingbased-literals/mappingbased-literals.md new file mode 100644 index 0000000000000000000000000000000000000000..a9ecb72f53360e7a55dbf116e04597564fb448bc --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-literals/mappingbased-literals.md @@ -0,0 +1,16 @@ +# Wikidata Mappingbased Literals +High-quality data extracted using the mapping-based extraction (Literal properties only). + +The dump contains only dbo properties. +If there is an owl:equivalentProperty from a Wikidata property to a DBO property, the property is replaced + +Errors from here are recorded in the debug artifact (object vs dataype) + +Improvement can be most effectively done by adding more equivalent class mappings to http://mapping.dbpedia.org + +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/wikidata/WikidataR2RExtractor.scala +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json + + +## Issue +* unclear, whether there are literals in wikidata that need cleaning diff --git a/databus-poms/dbpedia/wikidata/mappingbased-literals/pom.xml b/databus-poms/dbpedia/wikidata/mappingbased-literals/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..27ed6c07be6b18bc1e729f466a379de1c5815c47 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-literals/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>mappingbased-literals</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/mappingbased-objects-uncleaned.md b/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/mappingbased-objects-uncleaned.md new file mode 100644 index 0000000000000000000000000000000000000000..57d4d46170f929cda2cea7d3312c716fe67b9ff0 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/mappingbased-objects-uncleaned.md @@ -0,0 +1,12 @@ +# Mappingbased Objects uncleaned +High-quality data extracted using the mapping-based extraction (Object properties only). + +The dump contains only dbo properties, which are mapped by this json file, using patterns: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json + + +If there is an owl:equivalentProperty from a Wikidata property to a DBO property, the property is replaced. +Errors from here are recorded in the debug artifact (object vs dataype) + +Improvement can be most effectively done by adding more equivalent property mappings to http://mapping.dbpedia.org +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/wikidata/WikidataR2RExtractor.scala diff --git a/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/pom.xml b/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..457144d19a192fd0f026a2d19d0df006c18b0fa5 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-objects-uncleaned/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>mappingbased-objects-uncleaned</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/mappingbased-objects/mappingbased-objects.md b/databus-poms/dbpedia/wikidata/mappingbased-objects/mappingbased-objects.md new file mode 100644 index 0000000000000000000000000000000000000000..efefe8aa22652dfad3d8ef3f33a3ca988fdc6ba9 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-objects/mappingbased-objects.md @@ -0,0 +1,12 @@ +# Mappingbased Objects +High-quality data extracted using the mapping-based extraction (Object properties only). + +The dump contains only dbo properties, which are mapped by this json file, using patterns: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json + + +If there is an owl:equivalentProperty from a Wikidata property to a DBO property, the property is replaced. +Errors from here are recorded in the debug artifact (object vs dataype) + +Improvement can be most effectively done by adding more equivalent property mappings to http://mapping.dbpedia.org +* https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/wikidata/WikidataR2RExtractor.scala diff --git a/databus-poms/dbpedia/wikidata/mappingbased-objects/pom.xml b/databus-poms/dbpedia/wikidata/mappingbased-objects/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..64fe051c7690b72c641166b0a65cd65ab97cf295 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-objects/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>mappingbased-objects</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/mappingbased-properties-reified.md b/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/mappingbased-properties-reified.md new file mode 100644 index 0000000000000000000000000000000000000000..4877b244e2be9dffb31aa7e57055baa1e9e7f82d --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/mappingbased-properties-reified.md @@ -0,0 +1,7 @@ +# Wikidata Mappingbased Properties Reified +Mappingbased property triples given an identifier and split into 4 seperate triples so additional data can be added. + +Uses RDF reification of everything in the other two mappingbased artifacts. +Qualifiers adds the qualifiers from wikidata to the reification. + + diff --git a/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/pom.xml b/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..d0270b78fb20ab706771b2308a2de626224206ff --- /dev/null +++ b/databus-poms/dbpedia/wikidata/mappingbased-properties-reified/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.08.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>mappingbased-properties-reified</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/ontology-subclassof/ontology-subclassof.md b/databus-poms/dbpedia/wikidata/ontology-subclassof/ontology-subclassof.md new file mode 100644 index 0000000000000000000000000000000000000000..30e8e780e642efa770706327b349da86cef4e477 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/ontology-subclassof/ontology-subclassof.md @@ -0,0 +1,4 @@ +# Wikidata Ontology SubclassOf +Ontology Parent Classes + +Captures all subclasses of wikidata based on P279 in advance of the extraction. Mainly, used internally to improve instance types. diff --git a/databus-poms/dbpedia/wikidata/ontology-subclassof/pom.xml b/databus-poms/dbpedia/wikidata/ontology-subclassof/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..84c7716f0547e1042d38acb3907e1f0c8a2ec53a --- /dev/null +++ b/databus-poms/dbpedia/wikidata/ontology-subclassof/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>ontology-subclassof</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/page/page.md b/databus-poms/dbpedia/wikidata/page/page.md new file mode 100644 index 0000000000000000000000000000000000000000..44c6adb44861469b779926f2784d4d47440b6272 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/page/page.md @@ -0,0 +1,4 @@ +# Page Length and Ids +Numbers of characters contained in a Wikidata article's source and ids + +Numbers of characters contained in a Wikidata article's source and ids diff --git a/databus-poms/dbpedia/wikidata/page/pom.xml b/databus-poms/dbpedia/wikidata/page/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..0d65cb853c78bd83cd5f47d002ed5c5b7714661c --- /dev/null +++ b/databus-poms/dbpedia/wikidata/page/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>page</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/pom.xml b/databus-poms/dbpedia/wikidata/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..0621313e603f8f219412a1821817fbd5279db856 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/pom.xml @@ -0,0 +1,204 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <!-- the super-pom deactivates software compilation and configures the plugin to run in default phases --> + <parent> + <groupId>org.dbpedia.databus</groupId> + <artifactId>super-pom</artifactId> + <version>1.3-SNAPSHOT</version> + </parent> + <groupId>wikidata</groupId> + <artifactId>group-metadata</artifactId> + <packaging>pom</packaging> + <version>2019.10.01</version> + + <modules> + <module>alias</module> + <module>description</module> + <module>geo-coordinates</module> + <module>images</module> + <module>instance-types</module> + <module>labels</module> + <module>mappingbased-literals</module> + <module>mappingbased-objects</module> + <module>mappingbased-objects-uncleaned</module> + <!--module>mappingbased-properties-reified</module--> + <module>ontology-subclassof</module> + <module>page</module> + <module>properties</module> + <!--module>raw</module--> + <module>redirects</module> + <module>references</module> + <module>revision</module> + <module>sameas-all-wikis</module> + <module>sameas-external</module> + <!--module>sameas-wikidata</module--> + <module>debug</module> + </modules> + + <!-- needed 511 minutes for 110GB --> + + <properties> + <databus.tryVersionAsIssuedDate>true</databus.tryVersionAsIssuedDate> + <databus.packageDirectory> + /media/bigone/25TB/www/downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId} + </databus.packageDirectory> + <databus.downloadUrlPath> + https://downloads.dbpedia.org/repo/lts/${project.groupId}/${project.artifactId}/${project.version}/ + </databus.downloadUrlPath> + <databus.publisher>https://webid.dbpedia.org/webid.ttl#this</databus.publisher> + <!-- moved to settings.xml + databus.pkcs12File>${user.home}/.m2/certificate_generic.pfx</databus.pkcs12File--> + <databus.maintainer>https://alismayilov.github.io/webid-ali.ttl#this</databus.maintainer> + <databus.license>http://purl.oclc.org/NET/rdflicense/cc-by3.0</databus.license> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <databus.documentation><![CDATA[ +## Origin +This dataset was extracted using the XML wikidata-dumps available on https://www.wikidata.org/wiki/Wikidata:Database_download +The extraction is performed using the DBpedia Extraction-Framework available at https://github.com/dbpedia/extraction-framework +You may find more details on the Wikidata extractions at "Wikidata through the eyes of DBpedia" by Ali Ismayilov, Dimitris Kontokostas, Sören Auer, Jens Lehmann and Sebastian Hellmann. +You may download the article from http://www.semantic-web-journal.net/content/wikidata-through-eyes-dbpedia-1 + +## How to contribute +DBpedia is a community project, help us with this dataset: +* Extend and/or improve the Wikidata class & ontology mappings at the DBpedia mappings wiki (http://mappings.dbpedia.org) +* Extend and/or improve the Wikidata json mappings at https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json +* Improve this documentation in the pom.xml at https://github.com/dbpedia/databus-maven-plugin/tree/master/dbpedia/wikidata/pom.xml +* Improve the software at: https://github.com/dbpedia/extraction-framework + +## Known Issues +* RDF parsing and sorting failed for artifacts rediects, references, raw and sameas* +* we are attempting to switch from rapper to Sansa-Stack: http://sansa-stack.net/sansa-parser-performance-improved/ + +## Group Changelog +### 2018.07.20 to 2018.11.30 +* were created as new modular releases, some issues remain +* removed wikidata-sameas +* removed raw for now +* mappingbased objects are not cleaned +* we used rapper 2.0.14 to parse and `LC_ALL=C sort` to sort and ascii2uni -a U to unescape unicdoe +characters +* link to Wikimedia dump version is missing +## 2018.10.01 +* deleted due to bzip2 non-recoverable bad blocks + ]]></databus.documentation> + <!-- used for derive plugin --> + <databus.marvinversion>2019.10.01</databus.marvinversion> + </properties> + + <!-- currently still needed to find the super-pom, once the super-pom is in maven central, + this can be removed as well --> + <repositories> + <repository> + <id>archiva.internal</id> + <name>Internal Release Repository</name> + <url>http://databus.dbpedia.org:8081/repository/internal</url> + </repository> + <repository> + <id>archiva.snapshots</id> + <name>Internal Snapshot Repository</name> + <url>http://databus.dbpedia.org:8081/repository/snapshots</url> + <snapshots> + <updatePolicy>always</updatePolicy> + </snapshots> + </repository> + </repositories> + <build> + <plugins> + <plugin> + <groupId>org.dbpedia.databus</groupId> + <artifactId>databus-derive-maven-plugin</artifactId> + <version>1.0-SNAPSHOT</version> + <executions> + <execution> + <id>DeriveFromMarvin</id> + <goals> + <goal>clone</goal> + </goals> + </execution> + </executions> + <configuration> + <!-- + + PREFIX dataid: <http://dataid.dbpedia.org/ns/core#> +PREFIX dct: <http://purl.org/dc/terms/> +PREFIX dcat: <http://www.w3.org/ns/dcat#> +PREFIX db: <https://databus.dbpedia.org/> +PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> +PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> +prefix dataid-cv: <http://dataid.dbpedia.org/ns/cv#> + + +SELECT distinct ?versionstring WHERE { + ?dataset dataid:group <https://databus.dbpedia.org/marvin/wikidata> . + ?dataset dataid:version ?version . + ?dataset dct:hasVersion "2019.08.01"^^xsd:string + BIND (concat("<version>",REPLACE (str(?version),"2019.08.01","${databus.marvinversion}"),"</version>" ) AS ?versionstring ) +} +order by asc(?versionstring) +--> + <versions> + <version>https://databus.dbpedia.org/marvin/wikidata/alias/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/debug/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/description/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/geo-coordinates/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/images/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/instance-types/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/labels/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/mappingbased-literals/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/mappingbased-objects/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/mappingbased-objects-uncleaned/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/ontology-subclassof/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/page/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/properties/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/redirects/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/references/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/revision/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/sameas-all-wikis/${databus.marvinversion}</version> + <version>https://databus.dbpedia.org/marvin/wikidata/sameas-external/${databus.marvinversion}</version> + </versions> + <skipParsing>false</skipParsing> + <skipCloning>false</skipCloning> + </configuration> + </plugin> + </plugins> + <extensions> + <extension> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-webdav-jackrabbit</artifactId> + <version>3.0.0</version> + </extension> + </extensions> + </build> + + <profiles> + <profile> + <id>webdav</id> + <build> + <plugins> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>wagon-maven-plugin</artifactId> + <version>2.0.0</version> + <executions> + <execution> + <id>upload-databus</id> + <phase>install</phase> + <goals> + <goal>upload</goal> + </goals> + <configuration> + <fromDir>${project.build.directory}/databus/repo/${project.groupId}/${project.artifactId}</fromDir> + <url>dav:https://downloads.dbpedia.org/repo/</url> + <toDir>dbpedia/${project.groupId}/${project.artifactId}</toDir> + <serverId>downloads-dbpedia-org</serverId> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project> diff --git a/databus-poms/dbpedia/wikidata/properties/pom.xml b/databus-poms/dbpedia/wikidata/properties/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..964dd33ecd5ae8e8f6e40b3d803a484177510f5c --- /dev/null +++ b/databus-poms/dbpedia/wikidata/properties/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>properties</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/properties/properties.md b/databus-poms/dbpedia/wikidata/properties/properties.md new file mode 100644 index 0000000000000000000000000000000000000000..ec6d40b7fecbb52c9dbb6343cff7ee61771e384e --- /dev/null +++ b/databus-poms/dbpedia/wikidata/properties/properties.md @@ -0,0 +1,10 @@ +# Wikidata properties with labels +Wikidata property URIs as subject and dbo:alias to their labels + +A dump of all Wikidata properties. Some of them were used to generate mappings in R2R, e.g. +``` +<http://www.wikidata.org/entity/P957> <http://www.wikidata.org/entity/P1630> <https://www.wikidata.org/wiki/Special:BookSources/$1> . +``` + +## Issue +* The R2R mapping derived from this dataset, would need to be updated, but the update is manual atm, automation possible diff --git a/databus-poms/dbpedia/wikidata/redirects/pom.xml b/databus-poms/dbpedia/wikidata/redirects/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..99699d02933f96417a3732e66920a0e470ec7c28 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/redirects/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>redirects</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/redirects/redirects.md b/databus-poms/dbpedia/wikidata/redirects/redirects.md new file mode 100644 index 0000000000000000000000000000000000000000..ae8c1fdc7f585dcbcfd201fde69811d702e21d4a --- /dev/null +++ b/databus-poms/dbpedia/wikidata/redirects/redirects.md @@ -0,0 +1,5 @@ +# Redirects +Dataset containing redirects between articles in Wikidata. + +The extractor responsible for this dataset is: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/scala/org/dbpedia/extraction/mappings/RedirectExtractor.scala diff --git a/databus-poms/dbpedia/wikidata/references/pom.xml b/databus-poms/dbpedia/wikidata/references/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..b9cef4b95814fccb516af959cf43e31f1ae7fdbf --- /dev/null +++ b/databus-poms/dbpedia/wikidata/references/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>references</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/references/references.md b/databus-poms/dbpedia/wikidata/references/references.md new file mode 100644 index 0000000000000000000000000000000000000000..f95d51bf8e71a4199e712411c59794f9c5b8f78c --- /dev/null +++ b/databus-poms/dbpedia/wikidata/references/references.md @@ -0,0 +1,4 @@ +# Wikidata References +Reified statements about references + +TODO diff --git a/databus-poms/dbpedia/wikidata/revision/pom.xml b/databus-poms/dbpedia/wikidata/revision/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..839d5647b6d8eaed71456aa4e5de12de9f9dc857 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/revision/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>revision</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/revision/revision.md b/databus-poms/dbpedia/wikidata/revision/revision.md new file mode 100644 index 0000000000000000000000000000000000000000..e9362dbed2632b7a7a06482cf7600075de756096 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/revision/revision.md @@ -0,0 +1,4 @@ +# Wikidata Revision +The revision information of the Wikidata article the information was extracted from. + + diff --git a/databus-poms/dbpedia/wikidata/sameas-all-wikis/pom.xml b/databus-poms/dbpedia/wikidata/sameas-all-wikis/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..b2b451d73863ac359dde00b9c00abbf09a23c9e5 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/sameas-all-wikis/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>sameas-all-wikis</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/sameas-all-wikis/sameas-all-wikis.md b/databus-poms/dbpedia/wikidata/sameas-all-wikis/sameas-all-wikis.md new file mode 100644 index 0000000000000000000000000000000000000000..2aeadafbc41b10cea267e9d4f85562fd4cd61805 --- /dev/null +++ b/databus-poms/dbpedia/wikidata/sameas-all-wikis/sameas-all-wikis.md @@ -0,0 +1,4 @@ +# Wikidata SameAs All Wikis +Sameas for all wikis + +Contains the interlanguage links diff --git a/databus-poms/dbpedia/wikidata/sameas-external/pom.xml b/databus-poms/dbpedia/wikidata/sameas-external/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..d95005dfbec57e6e9d25cd4d393fd63fd5cb321d --- /dev/null +++ b/databus-poms/dbpedia/wikidata/sameas-external/pom.xml @@ -0,0 +1,13 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>group-metadata</artifactId> + <groupId>wikidata</groupId> + <version>2019.10.01</version> + </parent> + <modelVersion>4.0.0</modelVersion> + <artifactId>sameas-external</artifactId> + <groupId>wikidata</groupId> + <packaging>jar</packaging> + +</project> diff --git a/databus-poms/dbpedia/wikidata/sameas-external/sameas-external.md b/databus-poms/dbpedia/wikidata/sameas-external/sameas-external.md new file mode 100644 index 0000000000000000000000000000000000000000..9b089f764a0e9ee8581aafa6c0b73bfd7438bbbc --- /dev/null +++ b/databus-poms/dbpedia/wikidata/sameas-external/sameas-external.md @@ -0,0 +1,5 @@ +# SameAs External +External sameas + +Contains links to external data, uses R2R mapping: +https://github.com/dbpedia/extraction-framework/blob/master/core/src/main/resources/wikidatar2r.json#L92