library(cleanNLP) #' @export create_annotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE) { # Generate filename, for some reason there paste0 will pad the article id with leading whitespaces # To prevent this we stip 'em again filename <- gsub(" ", "", paste0("data/annotations/", article.id, "-", article.rev.id, ".RDS"), fixed = TRUE) # Check if there is a cached version of the annotations for this article in this specific revision if(use.cache & file.exists(filename)) { res <- tryCatch({ data <- readRDS(filename) data }, error = function (e) { cat("Cached data seems to be corrupted, redoing annotation.\n") }) return(res) } annotation <- cleanNLP::cnlp_annotate(text, as_strings = TRUE) # Write cache if desired if(write.cache) { if (!dir.exists("data")) { dir.create("data") } if (!dir.exists("data/annotations")) { dir.create("data/annotations") } saveRDS(annotation, filename) } # Return data # On a side note: Should we do this? The tidyverse style guide discourages explicit returns. # But then again, it suggests snake case for variables... return(annotation) }