Skip to content
Snippets Groups Projects
get_awards.R 1007 B
Newer Older
# Author: Lucas

#' Try to extract every award a given text about a pysicist mentions
#' @export
#' @param annotations annotations object from cleanNLP's createAnnotations() function.
#' @return string with all found awards
get_awards <- function(annotations) {

  ## list of tokens that indicate that a sentence deals with awards
  to_match <- c("award", "prize", "medal", "trophy ", " trophy")

  ## we assume that awards are being tagges as ~s o m e  k i n d~ of entity and
  ## thus only filter a texts entity set
  entities_obj <- cleanNLP::cnlp_get_entity(annotations)
  entities <- entities_obj$entity

  awards <- entities[grep(paste(to_match, collapse = "|"), entities, ignore.case = TRUE)]
  awards <- gsub("^(the|a) ", "", awards)
  awards <- gsub(" *[[:punct:]] +", " ", awards)
  awards <- gsub(".*Nobel Prize.*", "Nobel Price", awards, ignore.case = TRUE)
  awards <- trimws(awards)

  result <- stringi::stri_join(unique(awards), collapse = ", ")

  ifelse(length(result) >= 1, result, NA)
}