Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
dbpedia-assoc
MARVIN-config
Commits
1243216e
Commit
1243216e
authored
Jul 01, 2020
by
vehnem
Browse files
api: completeness & logs
parent
38f7d54e
Changes
6
Hide whitespace changes
Inline
Side-by-side
dashboard/src/main/scala/org/dbpedia/release/config/Config.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.config
import
java.net.URL
/**
* Dashboard config
*/
object
Config
{
object
completeness
{
object
query
{
def
mappings
(
version
:
String
)
:
String
=
s
"""
|
|"""
.
stripMargin
def
generic
(
version
:
String
)
:
String
=
s
"""
|
|"""
.
stripMargin
def
wikidata
(
version
:
String
)
:
String
=
s
"""PREFIX dataid: <http://dataid.dbpedia.org/ns/core#>
|PREFIX dct: <http://purl.org/dc/terms/>
|PREFIX dcat: <http://www.w3.org/ns/dcat#>
|
|SELECT ?expected_files ?actual_files ?delta ?artifact {
| {SELECT ?expected_files (COUNT(DISTINCT ?distribution) as ?actual_files) ((?actual_files-?expected_files)AS ?delta) ?artifact {
| VALUES (?artifact ?expected_files) {
|( <https://databus.dbpedia.org/dbpedia/wikidata/alias> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/debug> 3 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/description> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/geo-coordinates> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/images> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/instance-types> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/labels> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-literals> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-objects-uncleaned> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/mappingbased-properties-reified> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/ontology-subclassof> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/page> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/properties> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/redirects> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/references> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/revision> 2 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/sameas-all-wikis> 1 )
|( <https://databus.dbpedia.org/dbpedia/wikidata/sameas-external> 1 )
| }
| ?dataset dataid:artifact ?artifact .
| ?dataset dct:hasVersion ?versionString .
| ?dataset dcat:distribution ?distribution .
| FILTER(str(?versionString) = '$version')
| } GROUP BY ?artifact ?expected_files ?actual_files }
| FILTER(?delta != 0)
|}
|"""
.
stripMargin
}
}
object
extractionLogs
{
object
baseUrl
{
val
mappings
=
new
URL
(
"http://dbpedia-mappings.tib.eu/logs/"
)
val
generic
=
new
URL
(
"http://dbpedia-generic.tib.eu/logs/"
)
val
wikidata
=
new
URL
(
"http://dbpedia-wikidata.tib.eu/logs/"
)
}
object
name
{
val
downloadMappings
=
"downloadMappings.log"
val
downloadOntology
=
"downloadOntology.log"
val
downloadWikidumps
=
"downloadWikidumps.log"
val
extraction
=
"extraction.log"
val
postProcess
=
"postProcess.log"
val
unredirected
=
"unRedirected/"
}
val
names
:
Array
[
String
]
=
Array
(
name
.
downloadMappings
,
name
.
downloadOntology
,
name
.
downloadWikidumps
,
name
.
extraction
,
name
.
postProcess
,
name
.
unredirected
)
}
}
dashboard/src/main/scala/org/dbpedia/release/handler/CompletenessHandler.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.handler
import
org.apache.jena.query.QueryExecutionFactory
import
org.dbpedia.release.config.Config
import
org.dbpedia.release.model.VersionStatus
import
scala.collection.mutable.ListBuffer
object
CompletenessHandler
{
private
def
getQuery
(
group
:
String
,
version
:
String
)
:
Option
[
String
]
=
{
group
match
{
case
"mappings"
=>
Some
(
Config
.
completeness
.
query
.
mappings
(
version
))
case
"generic"
=>
Some
(
Config
.
completeness
.
query
.
generic
(
version
))
case
"wikidata"
=>
Some
(
Config
.
completeness
.
query
.
wikidata
(
version
))
case
_
=>
None
}
}
def
getStatus
(
group
:
String
,
version
:
String
)
:
Option
[
Array
[
VersionStatus
]]
=
{
getQuery
(
group
,
version
).
map
(
query
=>
{
val
arrayBuffer
=
new
ListBuffer
[
VersionStatus
]
QueryExecutionFactory
.
sparqlService
(
"https://databus.dbpedia.org/repo/sparql"
,
query
)
.
execSelect
()
.
forEachRemaining
(
resRow
=>
{
val
expectedFiles
=
resRow
.
get
(
"?expected_files"
).
asLiteral
().
getLexicalForm
.
toInt
val
actualFiles
=
resRow
.
get
(
"?actual_files"
).
asLiteral
().
getLexicalForm
.
toInt
val
artifact
=
resRow
.
get
(
"?artifact"
).
asResource
().
getURI
.
split
(
"/"
).
last
arrayBuffer
.
append
(
VersionStatus
(
group
,
artifact
.
toString
,
version
,
expectedFiles
,
actualFiles
))
})
arrayBuffer
.
toArray
})
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
getStatus
(
"wikidata"
,
"2020.03.01"
).
foreach
(
_
.
foreach
(
x
=>
println
(
x
.
artifact
,
x
.
actual
,
x
.
expected
)))
}
}
dashboard/src/main/scala/org/dbpedia/release/handler/InputDumpHandler.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.handler
object
InputDumpHandler
{
}
dashboard/src/main/scala/org/dbpedia/release/handler/ReleaseLogHandler.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.handler
import
java.io.
{
BufferedReader
,
InputStreamReader
}
import
java.net.URL
import
java.util.stream.Stream
import
org.dbpedia.release.config.Config
import
org.dbpedia.release.model.LogFile
import
scala.util.matching.Regex
object
ReleaseLogHandler
{
private
def
getLogsUrl
(
group
:
String
)
:
Option
[
URL
]
=
{
group
match
{
case
"mappings"
=>
Some
(
Config
.
extractionLogs
.
baseUrl
.
mappings
)
case
"generic"
=>
Some
(
Config
.
extractionLogs
.
baseUrl
.
generic
)
case
"wikidata"
=>
Some
(
Config
.
extractionLogs
.
baseUrl
.
wikidata
)
case
_
=>
None
}
}
private
val
HrefPattern
:
Regex
=
".*<a href=\"([a-zA-Z]\\S*)\">.*"
.
r
def
getLogFiles
(
group
:
String
,
version
:
String
)
:
Option
[
Array
[
LogFile
]]
=
{
try
{
getLogsUrl
(
group
).
map
((
baseUrl
:
URL
)
=>
{
val
set
=
new
BufferedReader
(
new
InputStreamReader
(
new
URL
(
baseUrl
,
version
).
openStream
()
)).
lines
().
flatMap
({
case
HrefPattern
(
fileName
)
=>
LogFile
.
apply
(
new
URL
(
baseUrl
,
version
+
"/"
),
fileName
).
getOrElse
()
match
{
case
Some
(
logFile
)
=>
Stream
.
of
(
logFile
)
case
_
=>
Stream
.
of
()
}
case
_
=>
Stream
.
of
()
}).
toArray
[
LogFile
](
size
=>
new
Array
[
LogFile
](
size
)).
map
(
x
=>
x
.
logName
->
x
).
toMap
Config
.
extractionLogs
.
names
.
map
(
logName
=>
{
if
(
set
.
contains
(
logName
))
set
(
logName
)
else
LogFile
(
""
,
logName
,
"WAIT"
)
})
})
}
catch
{
case
_:
Exception
=>
None
}
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
group
=
"wikidata"
val
version
=
"2020-05-01"
val
logFiles
=
getLogFiles
(
group
,
version
)
logFiles
.
foreach
(
_
.
foreach
(
x
=>
println
(
x
.
logName
,
x
.
state
)))
}
}
dashboard/src/main/scala/org/dbpedia/release/model/LogFile.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.model
import
java.net.URL
import
org.dbpedia.release.config.Config
object
LogFile
{
def
apply
(
baseUrl
:
URL
,
fileName
:
String
)
:
Option
[
LogFile
]
=
{
// TODO if else looks better
val
logNameOption
=
fileName
match
{
case
mappings
if
mappings
.
startsWith
(
"downloadMappings"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
downloadMappings
)
case
ontology
if
ontology
.
startsWith
(
"downloadOntology"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
downloadOntology
)
case
dumps
if
dumps
.
startsWith
(
"downloadWikidumps"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
downloadWikidumps
)
case
extract
if
extract
.
startsWith
(
"extraction"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
extraction
)
case
postproc
if
postproc
.
startsWith
(
"postProcessing"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
postProcess
)
case
unredirect
if
unredirect
.
startsWith
(
"unredirected"
)
=>
Some
(
Config
.
extractionLogs
.
name
.
unredirected
)
case
_
=>
None
}
val
state
=
if
(
fileName
.
endsWith
(
".log.bz2"
))
"DONE"
else
"RUN"
logNameOption
.
map
(
logName
=>
new
LogFile
(
new
URL
(
baseUrl
,
fileName
).
toString
,
logName
,
state
))
}
}
case
class
LogFile
(
url
:
String
,
logName
:
String
,
state
:
String
)
dashboard/src/main/scala/org/dbpedia/release/model/VersionStatus.scala
0 → 100644
View file @
1243216e
package
org.dbpedia.release.model
case
class
VersionStatus
(
group
:
String
,
artifact
:
String
,
version
:
String
,
expected
:
Int
,
actual
:
Int
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment