Skip to content
Snippets Groups Projects
create_annotations.R 1.19 KiB
Newer Older
David Fuhry's avatar
David Fuhry committed
library(cleanNLP)

Lucas Schons's avatar
Lucas Schons committed
create_annotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE) {
David Fuhry's avatar
David Fuhry committed
  # Generate filename, for some reason there paste0 will pad the article id with leading whitespaces
  # To prevent this we stip 'em again
David Fuhry's avatar
David Fuhry committed
  filename <- gsub(" ", "", paste0("data/annotations/", article.id, "-", article.rev.id, ".RDS"), fixed = TRUE)
David Fuhry's avatar
David Fuhry committed
  # Check if there is a cached version of the annotations for this article in this specific revision
David Fuhry's avatar
David Fuhry committed
  if(use.cache & file.exists(filename)) {
    res <- tryCatch({
      data <- readRDS(filename)
      data
    }, error = function (e) {
      cat("Cached data seems to be corrupted, redoing annotation.\n")
    })
    return(res)
  }

  annotation <- cleanNLP::cnlp_annotate(text, as_strings = TRUE)

David Fuhry's avatar
David Fuhry committed
  # Write cache if desired
David Fuhry's avatar
David Fuhry committed
  if(write.cache) {
    if (!dir.exists("data")) {
      dir.create("data")
    }
    if (!dir.exists("data/annotations")) {
      dir.create("data/annotations")
    }
    saveRDS(annotation, filename)
  }
David Fuhry's avatar
David Fuhry committed
  # Return data
  # On a side note: Should we do this? The tidyverse style guide discourages explicit returns.
  # But then again, it suggests snake case for variables...
David Fuhry's avatar
David Fuhry committed
  return(annotation)