Newer
Older
#!/usr/bin/env Rscript
# Author: Lukas
#' Extract birthdate
#' First sentences of the article
#' will be checked for birthdate with pattern to extract
#' it via NER. Idea: First appearing date is brithdate
#' @param annotations CNLP Annotations
#' @return String birthdate as string|NA
get_birthdate <- function(article, annotations) {
# Get birthdate from first Date entity in article
# - Get named entities
# - Get dates
# - Get first date
# - Trim day of death which might follow with "-" or "−"
# - Trim parentheses that might occur
# - Trim whitespaces
entities <- cleanNLP::cnlp_get_entity(annotations)
dates <- entities[entities$entity_type == "DATE",]
birthdate <- dates$entity[1]
birthdate <- gsub("[–|−].*", "", birthdate)
birthdate <- gsub("[\\(|\\)]", "", birthdate)
birthdate <- trimws(birthdate)