Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MARVIN-config
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dbpedia-assoc
MARVIN-config
Commits
4146ba9d
Commit
4146ba9d
authored
5 years ago
by
Your Name
Browse files
Options
Downloads
Patches
Plain Diff
wikidata
parent
20d6c734
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
wikidata/crontab.bak
+23
-0
23 additions, 0 deletions
wikidata/crontab.bak
wikidata/schedule/wikidata-release.sh
+140
-0
140 additions, 0 deletions
wikidata/schedule/wikidata-release.sh
with
163 additions
and
0 deletions
wikidata/crontab.bak
0 → 100644
+
23
−
0
View file @
4146ba9d
# Edit this file to introduce tasks to be run by cron.
#
# Each task to run has to be defined through a single line
# indicating with different fields when the task will be run
# and what command to run for the task
#
# To define the time you can provide concrete values for
# minute (m), hour (h), day of month (dom), month (mon),
# and day of week (dow) or use '*' in these fields (for 'any').#
# Notice that tasks will be started based on the cron's system
# daemon's notion of time and timezones.
#
# Output of the crontab jobs (including errors) is sent through
# email to the user the crontab file belongs to (unless redirected).
#
# For example, you can run a backup of all your user accounts
# at 5 a.m every week with:
# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
#
# For more information see the manual pages of crontab(5) and cron(8)
#
# m h dom mon dow command
0 0 7 * * /bin/bash -c '/home/extractor/schedule/wikidata-release.sh' >/dev/null 2>&1
This diff is collapsed.
Click to expand it.
wikidata/schedule/wikidata-release.sh
0 → 100755
+
140
−
0
View file @
4146ba9d
#!/bin/bash
# Wikidata DBpedia release script, version 1.0
set
-e
TIME_DATE
=
"2019-07-01"
#$(date +%Y-%m-%d)
EXTRACT_DIR
=
/home/extractor/extraction-framework
MVN_LOGS
=
/data/extraction/logs/mvn
#DATA_DIR=`awk -F= '/base-dir/{print $NF}' $EXTRACT_DIR/core/src/main/resources/universal.properties | head -n1`
DATA_DIR
=
/data/extraction/wikidumps/
#WWW_DIR=/var/www/html/wikidata
function
download-ontology
(){
cd
$EXTRACT_DIR
/core
;
../run download-ontology
;
}
function
recompile
(){
cd
$EXTRACT_DIR
;
mvn clean
install
;
}
function
download-r2r-mapping
(){
cd
$EXTRACT_DIR
/core/src/main/resources
&&
curl https://raw.githubusercontent.com/dbpedia/extraction-framework/master/core/src/main/resources/wikidatar2r.json
>
wikidatar2r.json
}
function
download-xml-dump
(){
cd
$EXTRACT_DIR
/dump
;
../run download download.wikidata.properties
\
>
$MVN_LOGS
/
$TIME_DATE
-wikidata
.download.out
\
2>
$MVN_LOGS
/
$TIME_DATE
-wikidata
.download.err
;
}
function
raw-extractor
(){
cd
$EXTRACT_DIR
/dump
;
#Run only .WikidataRawExtractor
../run extraction extraction.wikidataraw.properties
;
}
function
subclassof-script
(){
cd
$EXTRACT_DIR
/scripts
;
../run WikidataSubClassOf process.wikidata.subclassof.properties
;
}
function
all-other-extractors
(){
cd
$EXTRACT_DIR
/dump
;
# Run all other extractors
../run extraction extraction.wikidataexceptraw.properties
}
function
all-extractors
(){
cd
$EXTRACT_DIR
/dump
;
# Run all extractors to run extraction
../run extraction extraction.wikidata.properties
;
# > $MVN_LOGS/$TIME_DATE-wikidata.extraction.out \
# 2> $MVN_LOGS/$TIME_DATE-wikidata.extraction.err;
}
function
post-processing
(){
cd
$EXTRACT_DIR
/scripts
;
../run ResolveTransitiveLinks
$DATA_DIR
redirects transitive-redirects .ttl.bz2 wikidata
../run MapObjectUris
$DATA_DIR
transitive-redirects .ttl.bz2 mappingbased-objects-uncleaned,raw
-redirected
.ttl.bz2 wikidata
}
function
type-consistency-check
(){
cd
$EXTRACT_DIR
/scripts
;
../run TypeConsistencyCheck type.consistency.check.properties
;
}
function
sync-with-www
(){
rsync
-avz
$DATA_DIR
/wikidatawiki/
$WWW_DIR
/
;
#We don't need index.html
find
$WWW_DIR
/ |
grep
index.html | xargs
rm
-rf
;
}
function
databus-preparation
(){
cd
$DATA_DIR
;
bash ~/databusPrep.sh
$WWW_DIR
/ src/main/databus
;
}
function
delete-old-extractions
(){
#Delete extractions older than 1 month, i.e. keep 1-2 results in www.
find
$WWW_DIR
/
-type
d
-ctime
+20 | xargs
rm
-rf
;
#Remove everything in Dump dir, do we need to keep them?
rm
-rf
$DATA_DIR
/wikidatawiki/
*
;
}
function
remove-date-from-files
(){
#Go to the last changed directory
cd
"
$(
\l
s
-1dt
$WWW_DIR
/
*
/ |
head
-n
1
)
"
;
#Remove date (numbers) from files
for
i
in
*
;
do
mv
"
$i
"
"
`
echo
$i
|
sed
's/[0-9]..//g'
`
"
;
done
;
}
function
main
()
{
#delete-old-extractions; #to have some space for new extraction
# touch download.process;
download-ontology
;
download-r2r-mapping
;
download-xml-dump
;
recompile
;
all-extractors
;
post-processing
;
type-consistency-check
;
cd
/data/extraction/wikidumps
;
./prep.sh
;
cd
/data/extraction/databus-maven-plugin/dbpedia/wikidata
;
mvn package
;
mvn databus:deploy
;
#----
# below not configured yet
#----
##Result of subclassof-script is used in next extraction.
#subclassof-script;
#databus-preparation;
#Sync extraction with www
#sync-with-www
#remove-date-from-files
#This was the previous extraction process. Now we don't need to run rawextractor separately
# raw-extractor;
# subclassof-script;
# all-other-extractors;
# post-processing;
}
main
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment