Newer
Older
# Author: Lucas
#' Try to extract every award a given text about a pysicist mentions
#' @export
#' @param annotations annotations object from cleanNLP's createAnnotations() function.
#' @return string with all found awards
get_awards <- function(annotations) {
## list of tokens that indicate that a sentence deals with awards
to_match <- c("award", "prize", "medal", "trophy ", " trophy")
## we assume that awards are being tagges as ~s o m e k i n d~ of entity and
## thus only filter a texts entity set
entities_obj <- cleanNLP::cnlp_get_entity(annotations)
entities <- entities_obj$entity
awards <- entities[grep(paste(to_match, collapse = "|"), entities, ignore.case = TRUE)]
awards <- gsub("^(the|a) ", "", awards)
awards <- gsub(" *[[:punct:]] +", " ", awards)
awards <- gsub(".*Nobel Prize.*", "Nobel Price", awards, ignore.case = TRUE)
awards <- trimws(awards)
result <- stringi::stri_join(unique(awards), collapse = ", ")
ifelse(length(result) >= 1, result, NA)
}