diff --git a/processing/bin/Master.R b/processing/bin/Master.R
index 8e338d5d65a3ada707fbabdd63d3e6f8b8a620fe..0f476101b3c5fffeff755cd0623afd3947df3141 100755
--- a/processing/bin/Master.R
+++ b/processing/bin/Master.R
@@ -15,7 +15,7 @@ cat("Starting data import...\n")
 project_root <- find_root(has_file("README.md"))
 data_dir <- paste(project_root, "data", sep = .Platform$file.sep)
 
-articles <- wikiproc:::getData(use.cache = TRUE, data.dir = data_dir)
+articles <- wikiproc::getData(use.cache = TRUE, data.dir = data_dir)
 
 ## Data processing
 
@@ -27,7 +27,7 @@ results <- pbapply(articles, 1, function(article) {
 
   ## Data cleaning
 
-  cleaned.text <- wikiproc:::cleanHtml(article[4])
+  cleaned.text <- wikiproc::cleanHtml(article[4])
 
   ## Data preprocessing/annotating
 
@@ -35,7 +35,7 @@ results <- pbapply(articles, 1, function(article) {
 
   ## Extract information from Text
 
-  no.spouses <- wikiproc:::getNoOfSpouses(article[4])
+  no.spouses <- wikiproc::getNoOfSpouses(article[4])
 
   ## Create Results
 
diff --git a/processing/wikiproc/DESCRIPTION b/processing/wikiproc/DESCRIPTION
index 987b1cd672d8c3ee616a5e0dbc1b74ca796756c5..3d5ab21b9c3fabe88bc0bbee3ca5bd55b8718574 100644
--- a/processing/wikiproc/DESCRIPTION
+++ b/processing/wikiproc/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: wikiproc
-Title: What the Package Does (one line, title case)
+Title: Collection of Data Processing Utility Functions
 Version: 0.0.0.9000
 Authors@R: c(
   person("David", "Fuhry", role = "aut"),
@@ -9,7 +9,7 @@ Author: David Fuhry [aut],
   Lukas Gehrke [aut],
   Lucas Schons [aut]
 Maintainer: David Fuhry <not.an@email.address.net>
-Description: What the package does (one paragraph).
+Description: This package contains various functions that are needed to transform raw wikipedia html into processable facts.
 Depends: R (>= 3.5.0)
 License: GPL-2
 Encoding: UTF-8
@@ -18,9 +18,9 @@ RoxygenNote: 6.1.1
 Imports:
   pbapply,
   rvest,
-  stringi,
   textclean,
   stringr,
+  stringi,
   data.table,
   xml2,
   WikipediR,
diff --git a/processing/wikiproc/NAMESPACE b/processing/wikiproc/NAMESPACE
index 6ae926839dd1829f1016a96f766d970ff184ad97..f5ebafbf596ac4f41f745b7c83baff9b7edd80d3 100644
--- a/processing/wikiproc/NAMESPACE
+++ b/processing/wikiproc/NAMESPACE
@@ -1,2 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
+export(cleanHtml)
+export(createAnnotations)
+export(getBirthdate)
+export(getBirthplace)
+export(getData)
+export(getNoOfSpouses)
diff --git a/processing/wikiproc/R/CleanHtml.R b/processing/wikiproc/R/CleanHtml.R
index 182e9c839e512b15475b51821304eafec72cf959..b78b74f8e8a0b0badfc313e729e39659236f6463 100644
--- a/processing/wikiproc/R/CleanHtml.R
+++ b/processing/wikiproc/R/CleanHtml.R
@@ -5,11 +5,13 @@
 library(rvest)
 library(stringi)
 library(textclean)
+library(xml2)
 
 #' Clean a html formatted wikipedia page.
 #' Nodes of interest from the DOM are extracted and then cleaned from all html
 #' tags and annotations.
 #'
+#' @export
 #' @param html Url linking to a wikipedia webpage or a html formatted document.
 #' @return Plaintext document containing only the maintext of the give wikipedia page.
 cleanHtml <- function(html) {
@@ -22,10 +24,10 @@ cleanHtml <- function(html) {
   #     - remove whitespace after newline
   #     - remove whitespace before punctuation
   #     - replace multiple newlines with single newline
-  result <- read_html(html) %>%
-    html_nodes(css="h3:nth-child(13) , h4 , p+ h3 , p") %>%
-    stri_flatten(collapse = " ") %>%
-    replace_html() %>%
+  result <- xml2::read_html(html) %>%
+    rvest::html_nodes(css="h3:nth-child(13) , h4 , p+ h3 , p") %>%
+    stringi::stri_flatten(collapse = " ") %>%
+    textclean::replace_html() %>%
     gsub("\\[\\d*\\]", "", .) %>%
     gsub("  +", " ", .) %>%
     gsub("\n ", "\n", .) %>%
diff --git a/processing/wikiproc/R/GetBirthdate.R b/processing/wikiproc/R/GetBirthdate.R
index 42dbb69d5a774e2ae37c9015264e4d64cad83e56..dd980b57e69162967f842d62abcea3d508598fae 100644
--- a/processing/wikiproc/R/GetBirthdate.R
+++ b/processing/wikiproc/R/GetBirthdate.R
@@ -13,15 +13,16 @@ library(xml2)
 #' If there is no infobox, first paragraph of the article
 #' will be checked for birthdate
 #'
+#' @export
 #' @param article Article in HTML-format
 #' @return String birthdate as string|NULL
 getBirthdate <- function(article) {
-  
+
   if(grepl("vcard", article)) {
-  
+
     # Check if there is an infobox
     infoBox <- getInfoBox(article)
-    
+
     # Get the Born field
     birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content
     # Remove everything except the birthdate:
@@ -29,24 +30,24 @@ getBirthdate <- function(article) {
     birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate)
     # - Remove everything starting with newline
     birthdate <- gsub("\\n.*$", "", birthdate)
-    
+
     return(birthdate)
-  
-  
+
+
   } else if(!getIntroduction(article) == "") {
-    
+
     # Check first paragraph
     introduction <- getIntroduction(article)
-    
+
     # Get birthdate inside of parentheses
-    birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]]
+    birthdate <- stringr::str_extract_all(introduction, "\\([^()]+\\)")[[1]]
     # Remove parentheses
     birthdate <- substring(birthdate, 2, nchar(birthdate)-1)
-    
+
     return(birthdate)
-    
+
   } else {
-    
+
     # Return Null if there is no birthdate
     return(NULL)
   }
@@ -55,27 +56,27 @@ getBirthdate <- function(article) {
 ### Converts info box to table
 getInfoBox <- function(article) {
   # Read page as html
-  page <- read_html(article)
-  
+  page <- xml2::read_html(article)
+
   # Extracting text from the html will erase all <br> tags,
   # This will replace them with line breaks
-  
-  xml_find_all(page, ".//br") %>%
-    xml_add_sibling("p", "\n")
-  
-  xml_find_all(page, ".//br") %>%
-    xml_remove()
-  
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_add_sibling("p", "\n")
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_remove()
+
   # Get the info box
   # Will throw an error if there isnt any, so that should be checked beforehand
-  
+
   table <- page %>%
-    html_nodes("table.vcard") %>%
-    html_table(fill = TRUE) %>%
+    rvest::html_nodes("table.vcard") %>%
+    rvest::html_table(fill = TRUE) %>%
     .[[1]]
-  
+
   colnames(table) <- c("Desc", "Content")
-  
+
   return(table)
 }
 
@@ -85,28 +86,28 @@ getInfoBox <- function(article) {
 #' @return string introduction text from wikipedia article
 getIntroduction <- function(article) {
   # Read page as html
-  page <- read_html(article)
-  
+  page <- xml2::read_html(article)
+
   # Extracting text from the html will erase all <br> tags,
   # This will replace them with line breaks
-  
-  xml_find_all(page, ".//br") %>%
-    xml_add_sibling("p", "\n")
-  
-  xml_find_all(page, ".//br") %>%
-    xml_remove
-  
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_add_sibling("p", "\n")
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_remove
+
   # Get all paragraphs
   paragraphs <- page %>%
-    html_nodes("p") %>%
-    html_text()
-  
+    rvest::html_nodes("p") %>%
+    rvest::html_text()
+
   # There will be some leading paragraphs containing only "\n"
   # Remove those leading paragraphs
   remove <- c("\n")
-  cleaned <- setdiff(paragraphs, remove)
+  cleaned <- data.table::setdiff(paragraphs, remove)
   introduction <- cleaned[1]
-  
+
   # Return first paragraph
   return(introduction)
 }
diff --git a/processing/wikiproc/R/GetBirthplace.R b/processing/wikiproc/R/GetBirthplace.R
index 8726598874e32feeffa2c015ec5bb43cfec90adc..652217f87e0a9e0348f2fd15f6a3a1d92b6244e5 100644
--- a/processing/wikiproc/R/GetBirthplace.R
+++ b/processing/wikiproc/R/GetBirthplace.R
@@ -10,28 +10,29 @@ library(data.table)
 
 #' This script extracts Birthplace from physicist texts
 #' Try to get the infobox and extract the birthplace
-#' If there is no infobox, 0 will be returned as 
+#' If there is no infobox, 0 will be returned as
 #' birthplace is hard to extract from text
-#' 
+#'
+#' @export
 #' @param article Article in HTML-format
 #' @return String with birthplace of the physicist|0
 getBirthplace <- function(article) {
-  
+
   # If there is no infobox we return 0
   if(!grepl("vcard", article)) {
       return(0)
   }
-  
+
   # Use infobox to get Birthplace
   infoBox <- getInfoBox(article)
-  
+
   # Get 'Born' field
   birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content
-  
+
   # Remove everything in front of the "\n"
   # Rest is birthplace
   birthplace <- gsub(".*\\\n", "", birthplace)
-  
+
   # return birthplace
   return(birthplace)
 }
@@ -39,26 +40,26 @@ getBirthplace <- function(article) {
 ### Converts info box to table
 getInfoBox <- function(article) {
   # Read page as html
-  page <- read_html(article)
-  
+  page <- xml2::read_html(article)
+
   # Extracting text from the html will erase all <br> tags,
   # this will replace them with line breaks
-  
-  xml_find_all(page, ".//br") %>%
-    xml_add_sibling("p", "\n")
-  
-  xml_find_all(page, ".//br") %>%
-    xml_remove()
-  
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_add_sibling("p", "\n")
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_remove()
+
   # Get the info box
   # Will throw an error if there isnt any, so that should be checked beforehand
-  
+
   table <- page %>%
-    html_nodes("table.vcard") %>%
-    html_table(fill = TRUE) %>%
+    rvest::html_nodes("table.vcard") %>%
+    rvest::tml_table(fill = TRUE) %>%
     .[[1]]
-  
+
   colnames(table) <- c("Desc", "Content")
-  
+
   return(table)
 }
diff --git a/processing/wikiproc/R/GetData.R b/processing/wikiproc/R/GetData.R
index ef8713e866678a1313db70927bc63216683f2d79..2300cb676ad71130d2b789bd97cf58803a513453 100644
--- a/processing/wikiproc/R/GetData.R
+++ b/processing/wikiproc/R/GetData.R
@@ -14,6 +14,7 @@ library(xml2)
 
 #' Retrieve wikipedia articles about physicists
 #'
+#' @export
 #' @param use.cache Use cached data if it exists over downloading new data
 #' @param write.cache Write downloaded results into cache for use on future calls
 #' @param data.dir Directory the data should be read from and/or written to
@@ -40,7 +41,7 @@ getData <- function(use.cache = TRUE, write.cache = FALSE, data.dir = "data") {
 
   cat("Downloading list from wikipedia... ")
 
-  page <- read_html("https://en.wikipedia.org/wiki/List_of_physicists")
+  page <- xml2::read_html("https://en.wikipedia.org/wiki/List_of_physicists")
 
   cat("Done.\n")
 
@@ -49,8 +50,8 @@ getData <- function(use.cache = TRUE, write.cache = FALSE, data.dir = "data") {
   cat("Processing data:\n")
 
   physicists <- page %>%
-    html_nodes(".mw-parser-output li a") %>%
-    html_attr("href")
+    rvest::html_nodes(".mw-parser-output li a") %>%
+    rvest::html_attr("href")
 
   # Clean the list
 
@@ -81,15 +82,15 @@ getData <- function(use.cache = TRUE, write.cache = FALSE, data.dir = "data") {
 
   # Call the wikipedia api for each entry in our list
 
-  articles <- pblapply(physicists, function(x) {
+  articles <- pbapply::pblapply(physicists, function(x) {
     res <- tryCatch({
-      article <- page_content("en", "wikipedia", page_name = x, as_wikitext = FALSE)
+      article <- WikipediR::page_content("en", "wikipedia", page_name = x, as_wikitext = FALSE)
       # Check if the article is a redirect page
       if (grepl(".redirectText", article$parse$text$`*`)) {
         # Get the real article name
-        pname <- read_html(article$parse$text$`*`) %>%
-          html_nodes(".redirectText a") %>%
-          html_attr("href")
+        pname <- xml2::read_html(article$parse$text$`*`) %>%
+          rvest::html_nodes(".redirectText a") %>%
+          rvest::html_attr("href")
 
         panme <- gsub("_", " ", pname)
 
@@ -101,7 +102,7 @@ getData <- function(use.cache = TRUE, write.cache = FALSE, data.dir = "data") {
         Encoding(tmp) <- "UTF-8"
         pname <- tmp
 
-        article <- page_content("en", "wikipedia", page_name = pname, as_wikitext = FALSE)
+        article <- WikipediR::page_content("en", "wikipedia", page_name = pname, as_wikitext = FALSE)
       }
       data.frame(Title = article$parse$title,
                  PageID = article$parse$pageid,
diff --git a/processing/wikiproc/R/GetNoOfSpouses.R b/processing/wikiproc/R/GetNoOfSpouses.R
index 5190edab023ba3063d2afe1bc4f67f85f7cf4e36..d5882ceff7127d73eb011724dd43791b2466d61c 100755
--- a/processing/wikiproc/R/GetNoOfSpouses.R
+++ b/processing/wikiproc/R/GetNoOfSpouses.R
@@ -9,25 +9,26 @@
 
 library(rvest)
 library(data.table)
+library(xml2)
 
 ### Get number of spouses
-
+#' @export
 getNoOfSpouses <- function(article) {
-  
+
   # If there is no infobox we assume there were no spouses
   if(!grepl("vcard", article)) {
     return(0)
   }
-  
+
   infoBox <- getInfoBox(article)
-  
+
   # Get the spouse field
   spouses <- infoBox[infoBox$Desc %like% "Spouse",]$Content
   # Remove everything in parentheses
   spouses <- gsub("\\s*\\([^\\)]+\\)", "", spouses)
   # Split the strings by newlines to get one spouse per line
-  spouses <- strsplit(spouses, "\n")
-  spouses <- unlist(spouses)
+  spouses <- base::strsplit(spouses, "\n")
+  spouses <- base::unlist(spouses)
   if(length(spouses) > 0) {
     return(length(spouses))
   }
@@ -37,26 +38,26 @@ getNoOfSpouses <- function(article) {
 ### Converts info box to table
 getInfoBox <- function(article) {
   # Read page as html
-  page <- read_html(article)
-  
+  page <- xml2::read_html(article)
+
   # Extracting text from the html will erase all <br> tags,
   # this will replace them with line breaks
-  
-  xml_find_all(page, ".//br") %>%
-    xml_add_sibling("p", "\n")
-  
-  xml_find_all(page, ".//br") %>%
-    xml_remove()
-  
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_add_sibling("p", "\n")
+
+  xml2::xml_find_all(page, ".//br") %>%
+    xml2::xml_remove()
+
   # Get the info box
   # Will throw an error if there isnt any, so that should be checked beforehand
-  
+
   table <- page %>%
-    html_nodes("table.vcard") %>%
-    html_table(fill = TRUE) %>%
+    rvest::html_nodes("table.vcard") %>%
+    rvest::html_table(fill = TRUE) %>%
     .[[1]]
-  
+
   colnames(table) <- c("Desc", "Content")
-  
+
   return(table)
 }
diff --git a/processing/wikiproc/R/createAnnotations.R b/processing/wikiproc/R/createAnnotations.R
index b9ca6ebea7029055fc484efb723beb4605a607c9..6deb6a538e7d39472bc5d25a00f93a227ebeac12 100644
--- a/processing/wikiproc/R/createAnnotations.R
+++ b/processing/wikiproc/R/createAnnotations.R
@@ -1,14 +1,15 @@
 library(cleanNLP)
 
+#' @export
 createAnnotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE) {
-  
+
   # Generate filename, for some reason there paste0 will pad the article id with leading whitespaces
   # To prevent this we stip 'em again
-  
+
   filename <- gsub(" ", "", paste0("data/annotations/", article.id, "-", article.rev.id, ".RDS"), fixed = TRUE)
-  
+
   # Check if there is a cached version of the annotations for this article in this specific revision
-  
+
   if(use.cache & file.exists(filename)) {
     res <- tryCatch({
       data <- readRDS(filename)
@@ -18,11 +19,11 @@ createAnnotations <- function(text, article.id, article.rev.id, use.cache = TRUE
     })
     return(res)
   }
-  
-  annotation <- cnlp_annotate(text, as_strings = TRUE)
-  
+
+  annotation <- cleanNLP::cnlp_annotate(text, as_strings = TRUE)
+
   # Write cache if desired
-  
+
   if(write.cache) {
     if (!dir.exists("data")) {
       dir.create("data")
@@ -32,10 +33,10 @@ createAnnotations <- function(text, article.id, article.rev.id, use.cache = TRUE
     }
     saveRDS(annotation, filename)
   }
-  
+
   # Return data
   # On a side note: Should we do this? The tidyverse style guide discourages explicit returns.
   # But then again, it suggests snake case for variables...
-  
+
   return(annotation)
-}
\ No newline at end of file
+}
diff --git a/processing/wikiproc/man/cleanHtml.Rd b/processing/wikiproc/man/cleanHtml.Rd
index 56994f44d9eadcd9f8cb1fee71bd54d91e518629..7247852e83e59f5f6c7ba54df2f90692b7f98a9c 100644
--- a/processing/wikiproc/man/cleanHtml.Rd
+++ b/processing/wikiproc/man/cleanHtml.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/CleanHtml.R
 \name{cleanHtml}
 \alias{cleanHtml}
-\title{Clean a html formatted wikipedia page. 
+\title{Clean a html formatted wikipedia page.
 Nodes of interest from the DOM are extracted and then cleaned from all html
 tags and annotations.}
 \usage{
@@ -15,7 +15,7 @@ cleanHtml(html)
 Plaintext document containing only the maintext of the give wikipedia page.
 }
 \description{
-Clean a html formatted wikipedia page. 
+Clean a html formatted wikipedia page.
 Nodes of interest from the DOM are extracted and then cleaned from all html
 tags and annotations.
 }
diff --git a/processing/wikiproc/man/getBirthplace.Rd b/processing/wikiproc/man/getBirthplace.Rd
index 8dd69d7d49dcc12d33914ecdaed68baf1d0fa78c..b1b083083922001d92b76f97cfb38ec7ca0049bf 100644
--- a/processing/wikiproc/man/getBirthplace.Rd
+++ b/processing/wikiproc/man/getBirthplace.Rd
@@ -4,7 +4,7 @@
 \alias{getBirthplace}
 \title{This script extracts Birthplace from physicist texts
 Try to get the infobox and extract the birthplace
-If there is no infobox, 0 will be returned as 
+If there is no infobox, 0 will be returned as
 birthplace is hard to extract from text}
 \usage{
 getBirthplace(article)
@@ -18,6 +18,6 @@ String with birthplace of the physicist|0
 \description{
 This script extracts Birthplace from physicist texts
 Try to get the infobox and extract the birthplace
-If there is no infobox, 0 will be returned as 
+If there is no infobox, 0 will be returned as
 birthplace is hard to extract from text
 }
diff --git a/processing/wikiproc/man/getData.Rd b/processing/wikiproc/man/getData.Rd
index 13e362d15d94d684eb53fe4c6a8001bc4b89949e..ec865807ac52c5cc079ccda9998e01632f97e969 100644
--- a/processing/wikiproc/man/getData.Rd
+++ b/processing/wikiproc/man/getData.Rd
@@ -4,12 +4,14 @@
 \alias{getData}
 \title{Retrieve wikipedia articles about physicists}
 \usage{
-getData(use.cache = TRUE, write.cache = FALSE)
+getData(use.cache = TRUE, write.cache = FALSE, data.dir = "data")
 }
 \arguments{
 \item{use.cache}{Use cached data if it exists over downloading new data}
 
 \item{write.cache}{Write downloaded results into cache for use on future calls}
+
+\item{data.dir}{Directory the data should be read from and/or written to}
 }
 \value{
 data.frame containing the title, id, revisionID and html-formatted full text