#' Initialize the nlp backend #' #' A wrapper used to set the python environment and call cnlp_init #' #' @param type Type of python env to use, either "conda" or "python" #' @param value Connection string, if using a conda environment the name of it #' if using python directly the path to the python executable #' #' @return Does not return data #' @export #' #' @examples #' \dontrun{ #' init_nlp("conda", "spcy") #' } init_nlp <- function(type, value) { if (type == "conda") { reticulate::use_condaenv(value, required = TRUE) } else if (type == "python") { reticulate::use_python(value, required = TRUE) } cleanNLP::cnlp_init_spacy(entity_flag = TRUE) } #' Create annotations for the given text #' #' @param text Text to annotate #' @param article.id ArticleID used for cashing. **Optional** as long as cache is not to be used. #' @param article.rev.id ArticleRevisionID used for cashing. **Optional** as long as cache is not to be used. #' @param use.cache Should cashed data be uses #' @param write.cache Should the generated annotations be cashed #' @param data.dir Directory the data should be read from and/or written to #' #' @return Annotation object for use with cleanNLP methods #' @export create_annotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE, data.dir = "data") { if (use.cache || write.cache) { if (!missing(article.id) && !missing(article.rev.id)) { # Generate filename, for some reason there paste0 will pad the article id with leading whitespaces # To prevent this we stip 'em agsain filename <- gsub(" ", "", paste(data_dir, "annotations", paste0(article.id, "-", article.rev.id, ".RDS"), sep = .Platform$file.sep), fixed = TRUE) # Check if there is a cached version of the annotations for this article in this specific revision if(use.cache && file.exists(filename)) { res <- tryCatch({ data <- readRDS(filename) data }, error = function (e) { cat("Cached data seems to be corrupted, redoing annotation.\n") }) return(res) } } else if (use.cache) { warning("use.cache was set to true without providing article id and revision id, cache will not be used.") } } annotation <- cleanNLP::cnlp_annotate(text, as_strings = TRUE) # Write cache if desired if(write.cache) { if (!missing(article.id) && !missing(article.rev.id)) { if (!dir.exists(data_dir)) { dir.create(data_dir) } if (!dir.exists(paste(data_dir, "annotations", sep = .Platform$file.sep))) { dir.create(paste(data_dir, "annotations", sep = .Platform$file.sep)) } saveRDS(annotation, filename) } else { warning("write.cache was set to true without providing article id and revision id, cache will not be written") } } # Return data # Still need this return(annotation) }