Newer
Older
articles <- getData(use.cache = FALSE, write.cache = TRUE, data.dir = "../../data/")
# Within this function article is a vector representing a single row of our original data frame
# This means article[1] represents the Title, article[2] the PageID etc.
cleaned.text <- cleanHtml(article[4])
# annotation <- createAnnotations(cleaned.text, article[2], article[3])
no.spouses <- getNoOfSpouses(article[4])
NoSpouses = no.spouses,
stringsAsFactors = FALSE)
## Results are now in results
## Format for rasa
cat("Writing rasa files to 'rasa/'...\n")