Commit 57ab85aa authored by vehnem's avatar vehnem
Browse files

completness check

parent 1243216e
package org.dbpedia.release
import java.text.SimpleDateFormat
import java.util.Calendar
import org.apache.jena.query.{QueryException, QueryExecutionFactory}
import org.dbpedia.release.config.Config
import org.dbpedia.release.config.Config.versions
import org.dbpedia.release.handler.{CompletenessHandler, ReleaseLogHandler}
import org.dbpedia.release.model.VersionStatus
import org.json4s.{DefaultFormats, Formats}
import org.scalatra._
import org.scalatra.json.JacksonJsonSupport
import org.slf4j.LoggerFactory
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
// Swagger support
......@@ -34,16 +40,34 @@ class DataApiServlet(implicit val swagger: Swagger)
get("/release/versions") {
val versions = new ListBuffer[VersionStatus]()
val qexec = QueryExecutionFactory
.sparqlService(
"http://databus.dbpedia.org/repo/sparql",
Config.versions.query
)
qexec.execSelect()
.forEachRemaining(row => {
versions.append(VersionStatus(row.get("version").asLiteral().getLexicalForm,2))
})
qexec.close()
val latestOnBus = versions.map(_.version).max
val latestPossible = new SimpleDateFormat("y.MM").format(Calendar.getInstance().getTime) + ".01"
if (latestOnBus < latestPossible)
VersionStatus(latestPossible,0) :: versions.toList
else
versions.toList
}
get("/release/logs/:group/:version") {
val group = params("group")
val version = params("version").replace(".","-")
val version = params("version").replace(".", "-")
ReleaseLogHandler.getLogFiles(group,version) match {
ReleaseLogHandler.getLogFiles(group, version) match {
case Some(array) => array.toList
case _ => List
case _ => List()
}
}
......@@ -58,7 +82,10 @@ class DataApiServlet(implicit val swagger: Swagger)
val group = params("group")
val version = params("version")
CompletenessHandler.getStatus(group,version)
CompletenessHandler.getStatus(group, version) match {
case Some(success) => success
case _ => List()
}
}
private val getVersions =
......
package org.dbpedia.release.config
import java.net.URL
import java.text.SimpleDateFormat
import java.util.Calendar
import org.apache.jena.query.QueryExecutionFactory
import scala.collection.mutable.ListBuffer
/**
* Dashboard config
*/
object Config {
object versions {
val query : String =
"""PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
|PREFIX dct: <http://purl.org/dc/terms/>
|
|SELECT DISTINCT ?version WHERE {
| VALUES ?publisher { <https://databus.dbpedia.org/marvin> }
| ?s a dataid:Dataset.
| ?s dataid:account ?publisher .
| ?s dct:hasVersion ?version .
|
|} ORDER BY DESC(?version)
|""".stripMargin
}
object completeness {
object query {
def mappings(version:String): String =
s"""
s"""PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
|PREFIX dct: <http://purl.org/dc/terms/>
|PREFIX dcat: <http://www.w3.org/ns/dcat#>
|
|SELECT ?expected_files ?actual_files ?delta ?artifact {
| {SELECT ?expected_files (COUNT(DISTINCT ?distribution) as ?actual_files) ((?actual_files-?expected_files)AS ?delta) ?artifact {
| VALUES (?artifact ?expected_files) {
|( <https://databus.dbpedia.org/marvin/mappings/geo-coordinates-mappingbased> 29 )
|( <https://databus.dbpedia.org/marvin/mappings/instance-types> 80 )
|( <https://databus.dbpedia.org/marvin/mappings/mappingbased-literals> 40 )
|( <https://databus.dbpedia.org/marvin/mappings/mappingbased-objects> 120 )
|( <https://databus.dbpedia.org/marvin/mappings/mappingbased-objects-uncleaned> 40 )
|( <https://databus.dbpedia.org/marvin/mappings/specific-mappingbased-properties> 40 )
| }
| ?dataset dataid:artifact ?artifact .
| ?dataset dct:hasVersion ?versionString .
| ?dataset dcat:distribution ?distribution .
| FILTER(str(?versionString) = '$version')
| } GROUP BY ?artifact ?expected_files ?actual_files }
| #FILTER(?delta != 0)
|}
|""".stripMargin
def generic(version:String): String =
s"""
s"""PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
|PREFIX dct: <http://purl.org/dc/terms/>
|PREFIX dcat: <http://www.w3.org/ns/dcat#>
|
|SELECT ?expected_files ?actual_files ?delta ?artifact {
| {SELECT ?expected_files (COUNT(DISTINCT ?distribution) as ?actual_files) ((?actual_files-?expected_files)AS ?delta) ?artifact {
| VALUES (?artifact ?expected_files) {
|( <https://databus.dbpedia.org/dbpedia/generic/anchor-text> 1 )
|( <https://databus.dbpedia.org/dbpedia/generic/article-templates> 278 )
|( <https://databus.dbpedia.org/dbpedia/generic/categories> 417 )
|( <https://databus.dbpedia.org/dbpedia/generic/citations> 2 )
|( <https://databus.dbpedia.org/dbpedia/generic/commons-sameas-links> 7 )
|( <https://databus.dbpedia.org/dbpedia/generic/disambiguations> 15 )
|( <https://databus.dbpedia.org/dbpedia/generic/external-links> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/geo-coordinates> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/homepages> 13 )
|( <https://databus.dbpedia.org/dbpedia/generic/infobox-properties> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/infobox-property-definitions> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/interlanguage-links> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/labels> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/page> 278 )
|( <https://databus.dbpedia.org/dbpedia/generic/persondata> 4 )
|( <https://databus.dbpedia.org/dbpedia/generic/redirects> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/revisions> 278 )
|( <https://databus.dbpedia.org/dbpedia/generic/topical-concepts>11 )
|( <https://databus.dbpedia.org/dbpedia/generic/wikilinks> 139 )
|( <https://databus.dbpedia.org/dbpedia/generic/wikipedia-links> 139 )
| }
| ?dataset dataid:artifact ?artifact .
| ?dataset dct:hasVersion ?versionString .
| ?dataset dcat:distribution ?distribution .
| FILTER(str(?versionString) = '$version')
| } GROUP BY ?artifact ?expected_files ?actual_files }
| #FILTER(?delta != 0)
|}
|""".stripMargin
def wikidata(version: String): String =
......@@ -29,31 +103,31 @@ object Config {
|SELECT ?expected_files ?actual_files ?delta ?artifact {
| {SELECT ?expected_files (COUNT(DISTINCT ?distribution) as ?actual_files) ((?actual_files-?expected_files)AS ?delta) ?artifact {
| VALUES (?artifact ?expected_files) {
|( <https://databus.dbpedia.org/dbpedia/wikidata/alias> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/debug> 3 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/description> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/geo-coordinates> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/images> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/instance-types> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/labels> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-literals> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-objects-uncleaned> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-properties-reified> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/ontology-subclassof> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/page> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/properties> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/redirects> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/references> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/revision> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/sameas-all-wikis> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/sameas-external> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/alias> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/debug> 3 )
|( <https://databus.dbpedia.org/marvin/wikidata/description> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/geo-coordinates> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/images> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/instance-types> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/labels> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/mappingbased-literals> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/mappingbased-objects-uncleaned> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/mappingbased-properties-reified> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/ontology-subclassof> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/page> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/properties> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/redirects> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/references> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/revision> 2 )
|( <https://databus.dbpedia.org/marvin/wikidata/sameas-all-wikis> 1 )
|( <https://databus.dbpedia.org/marvin/wikidata/sameas-external> 1 )
| }
| ?dataset dataid:artifact ?artifact .
| ?dataset dct:hasVersion ?versionString .
| ?dataset dcat:distribution ?distribution .
| FILTER(str(?versionString) = '$version')
| } GROUP BY ?artifact ?expected_files ?actual_files }
| FILTER(?delta != 0)
| #FILTER(?delta != 0)
|}
|""".stripMargin
}
......
......@@ -2,7 +2,7 @@ package org.dbpedia.release.handler
import org.apache.jena.query.QueryExecutionFactory
import org.dbpedia.release.config.Config
import org.dbpedia.release.model.VersionStatus
import org.dbpedia.release.model.ArtifactStatus
import scala.collection.mutable.ListBuffer
......@@ -17,25 +17,27 @@ object CompletenessHandler {
}
}
def getStatus(group: String, version: String): Option[Array[VersionStatus]] = {
getQuery(group, version).map(query => {
val arrayBuffer = new ListBuffer[VersionStatus]
QueryExecutionFactory
.sparqlService("https://databus.dbpedia.org/repo/sparql", query)
.execSelect()
.forEachRemaining(resRow => {
val expectedFiles = resRow.get("?expected_files").asLiteral().getLexicalForm.toInt
val actualFiles = resRow.get("?actual_files").asLiteral().getLexicalForm.toInt
val artifact = resRow.get("?artifact").asResource().getURI.split("/").last
arrayBuffer.append(VersionStatus(group, artifact.toString, version, expectedFiles, actualFiles))
})
arrayBuffer.toArray
})
def getStatus(group: String, version: String): Option[List[ArtifactStatus]] = {
try {
getQuery(group, version).map(query => {
val buffer = new ListBuffer[ArtifactStatus]
val exec = QueryExecutionFactory
.sparqlService("https://databus.dbpedia.org/repo/sparql", query)
exec.execSelect()
.forEachRemaining(resRow => {
val expectedFiles = resRow.get("?expected_files").asLiteral().getLexicalForm.toInt
val actualFiles = resRow.get("?actual_files").asLiteral().getLexicalForm.toInt
val artifact = resRow.get("?artifact").asResource().getURI.split("/").last
buffer.append(ArtifactStatus(group, artifact.toString, version, expectedFiles, actualFiles))
})
exec.close()
buffer.toList
})
} catch {
case _: Exception => None
}
}
def main(args: Array[String]): Unit = {
getStatus("wikidata", "2020.03.01").foreach(_.foreach(x => println(x.artifact, x.actual, x.expected)))
......
......@@ -24,24 +24,22 @@ object ReleaseLogHandler {
def getLogFiles(group: String, version: String): Option[Array[LogFile]] = {
try {
getLogsUrl(group).map((baseUrl: URL) => {
val set =
new BufferedReader(new InputStreamReader(
new URL(baseUrl, version).openStream()
)).lines().flatMap({
case HrefPattern(fileName) =>
LogFile.apply(new URL(baseUrl, version + "/"), fileName).getOrElse() match {
case Some(logFile) => Stream.of(logFile)
case _ => Stream.of()
}
case _ => Stream.of()
}).toArray[LogFile](size => new Array[LogFile](size)).map(x => x.logName -> x).toMap
Config.extractionLogs.names.map(logName => {
if (set.contains(logName)) set(logName)
else LogFile("", logName, "WAIT")
})
val remote: Map[String, LogFile] = new BufferedReader(new InputStreamReader(
new URL(baseUrl, version).openStream()
)).lines().flatMap({
case HrefPattern(fileName) =>
println(fileName)
LogFile.apply(new URL(baseUrl, version + "/"), fileName) match {
case Some(logFile) => Stream.of(logFile)
case _ => Stream.of()
}
case _ => Stream.of()
}).toArray[LogFile](size => new Array[LogFile](size)).map(lf => {
lf.logName -> lf
}).toMap
Config.extractionLogs.names.map(logName => remote.getOrElse(logName, LogFile("", logName, "WAIT")))
})
} catch {
case _: Exception => None
......@@ -55,6 +53,6 @@ object ReleaseLogHandler {
val logFiles = getLogFiles(group, version)
logFiles.foreach(_.foreach(x => println(x.logName,x.state)))
logFiles.foreach(_.foreach(x => println(x.logName, x.state)))
}
}
package org.dbpedia.release.model
case class ArtifactStatus(group: String, artifact: String, version: String, expected : Int, actual: Int)
......@@ -18,7 +18,7 @@ object LogFile {
case _ => None
}
val state = if (fileName.endsWith(".log.bz2")) "DONE" else "RUN"
val state = if (fileName.endsWith(".log.bz2") || fileName.endsWith("/")) "DONE" else "RUN"
logNameOption.map(logName => new LogFile(new URL(baseUrl, fileName).toString, logName, state))
}
......
package org.dbpedia.release.model
case class VersionStatus(group: String, artifact: String, version: String, expected : Int, actual: Int)
case class VersionStatus(version: String, state: Int)
......@@ -26,9 +26,10 @@
</a>
<h3 class="text-dark text-center">MARVIN Release BOT</h3>
<!-- version-list -->
<!-- data-url="data.json" data-toggle="table" -->
<div class="version-table">
<table data-toggle="table" data-search="true" data-pagination="true" data-url="data.json"
data-page-size="10" class="table">
<table data-search="true" data-pagination="true"
data-page-size="10" class="table" id="version-table">
<thead>
<th data-field="version" class="text-dark">Versions</th>
</thead>
......@@ -228,10 +229,10 @@
<div class="col-sm-6 p-3">
<div class="card">
<div class="card-body">
<h4>Medawiki Dump Download Completeness</h4>
<h4>Wikimedia Dumps</h4>
<div class="row">
<div id="mappings-downloads" class="col-sm-8" style="height: 250px;">
Download Chart
Dump Status...
</div>
<div class="col-sm-4">
<h6 class="text-danger">Missing files </h6>
......@@ -246,8 +247,7 @@
<div class="col-sm-6 p-3">
<div class="card">
<div class="card-body">
<h4>Downloadable Log Files</h4>
<h4>Log Files</h4>
<table id="mappings-logs" data-pagination="true"
data-page-size="4" class="table">
<thead>
......@@ -262,10 +262,27 @@
<div class="col-sm-6 p-3">
<div class="card">
<div class="card-body">
<h4>Completeness Check on Databus</h4>
<div id="">
Integration Query results
<h4>MARVIN Release Completeness</h4>
Artifact
<div class="progress">
<div id="mappings-completeness-artifacts" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
Files
<div class="progress">
<div id="mappings-completeness-files" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
<table id="mappings-completeness-table" data-pagination="true"
data-page-size="4" class="table">
<thead>
<th data-field="state">State</th>
<th data-field="artifact">Artifact</th>
<th data-field="missing">Missing</th>
</thead>
</table>
</div>
</div>
</div>
......@@ -310,10 +327,27 @@
<div class="col-sm-6 p-3">
<div class="card">
<div class="card-body">
<h4>Completeness Check on Databus</h4>
<p>
Integration Query results
</p>
<h4>MARVIN Release Completeness</h4>
Artifact
<div class="progress">
<div id="generic-completeness-artifacts" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
Files
<div class="progress">
<div id="generic-completeness-files" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
<table id="generic-completeness-table" data-pagination="true"
data-page-size="4" class="table">
<thead>
<th data-field="state">State</th>
<th data-field="artifact">Artifact</th>
<th data-field="missing">Missing</th>
</thead>
</table>
</div>
</div>
</div>
......@@ -358,10 +392,27 @@
<div class="col-sm-6 p-3">
<div class="card">
<div class="card-body">
<h4>Completeness Check on Databus</h4>
<p>
Integration Query results
</p>
<h4>MARVIN Release Completeness</h4>
Artifact
<div class="progress">
<div id="wikidata-completeness-artifacts" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
Files
<div class="progress">
<div id="wikidata-completeness-files" class="progress-bar progress-bar-striped font-weight-bold" style="width: 100%;">
0/0
</div>
</div>
<table id="wikidata-completeness-table" data-pagination="true"
data-page-size="4" class="table">
<thead>
<th data-field="state">State</th>
<th data-field="artifact">Artifact</th>
<th data-field="missing">Missing</th>
</thead>
</table>
</div>
</div>
</div>
......
const api = "http://localhost:8080/api/"
/* anchor scroll offset */
window.addEventListener("hashchange", function () {
this.console.log("#")
window.scrollTo(window.scrollX, window.scrollY - 80);
});
// var [publisher, group, version] = window.location.pathname.split("/").slice(1,4)
/** TOTAL **/
// TODO
/* versions */
/** RELASE **/
function latestDate() {
const d = new Date();
const ye = new Intl.DateTimeFormat('en', { year: 'numeric' }).format(d)
const mo = new Intl.DateTimeFormat('en', { month: '2-digit' }).format(d)
return `${ye}.${mo}.01`;
};
/* version */
const urlParams = new URLSearchParams(window.location.search)
var version = urlParams.get('version')
version = version ? version : getLatestVersion()
document.getElementById("version-text").innerHTML = "Release: " + (version ? version : "2020.X.X") +" (todo fetch)"
version = version ? version : latestDate()
document.getElementById("version-text").innerHTML = version + " - Release"
$.getJSON(api + "release/versions", function (data) {
var versions = []
data.forEach(element => {
txtColo = element.state == 0 ? "text-danger" : "text-info"
versions.push({
'version':
'<a class="' + txtColo + '" href="?version=' + element.version + '#version">' + element.version + '</a> ' +
'<a class="text-dark" href="?version=' + element.version + '#mappings">m</a>|' +
'<a class="text-dark" href="?version=' + element.version + '#generic">g</a>|' +
'<a class="text-dark" href="?version=' + element.version + '#wikidata">w</a>'
})
});
$('#version-table').bootstrapTable({ 'data': versions })
});
/* overall */
/* total */
/** mappings **/
/* overall */
/* mappings-download */
/* mappings-dumps */
google.charts.load('current', {'packages':['corechart']});
google.charts.load('current', { 'packages': ['corechart'] });
google.charts.setOnLoadCallback(drawChart);
function drawChart() {
var data = google.visualization.arrayToDataTable([
['State', 'Number'],
['Finished', 30],
['Waiting', 9],
['Mssing', 1]
['Finished', 32],
['Waiting', 5],
['Mssing', 3]
]);
var options = {
colors:['#28a745','#17a2b8','#dc3545'],
colors: ['#28a745', '#17a2b8', '#dc3545'],
pieHole: 0.5,
pieSliceText: 'value',
};
......@@ -45,61 +62,128 @@ function drawChart() {
chart.draw(data, options);
}
/* mappings logs */
/* mappings-logs */
// n/a - not loaded,
// unzipped - in progress,
// zipped - done (but success unclear)
var $table = $('#mappings-logs')
var mappingsLogsTable = $('#mappings-logs')
$(function() {
$(function () {
var data = [
{
'filename': '<a href="#">preprocess.log</a>',
'description': 'preprocessing (todo explain)',
'state' : '<span class="text-warning">RUN</span>'
'state': '<span class="text-warning">RUN</span>'
},
{
'filename': '<a href="#">extraction.mappings.log</a>',
'description': 'process of DIEF',
'state' : '<span class="text-success">DONE</span>'
'state': '<span class="text-success">DONE</span>'
},
{
'filename': '<a href="#">dump.log</a>',
'description': 'download of latest dumps',
'state' : '<span class="text-success">DONE</span>'
'state': '<span class="text-success">DONE</span>'
},
{
'filename': '<a href="#">ontology.log</a>',
'description': 'download of latest ontology',