# Author: Lucas #' Try to extract every award a given text about a pysicist mentions #' @export #' @param annotations annotations object from cleanNLP's createAnnotations() function. #' @return string with all found awards get_awards <- function(annotations) { ## list of tokens that indicate that a sentence deals with awards to_match <- c("award", "prize", "medal", "trophy ", " trophy") ## we assume that awards are being tagges as ~s o m e k i n d~ of entity and ## thus only filter a texts entity set entities_obj <- cleanNLP::cnlp_get_entity(annotations) entities <- entities_obj$entity awards <- entities[grep(paste(to_match, collapse = "|"), entities, ignore.case = TRUE)] awards <- gsub("^(the|a) ", "", awards) awards <- gsub(" *[[:punct:]] +", " ", awards) awards <- gsub(".*Nobel Prize.*", "Nobel Price", awards, ignore.case = TRUE) awards <- trimws(awards) result <- stringi::stri_join(unique(awards), collapse = ", ") ifelse(length(result) >= 1, result, NA) }