diff --git a/r/GetBirthdate.R b/r/GetBirthdate.R index 21f61a5f57ec5f5dd475d06c32087c2f19a250a0..42dbb69d5a774e2ae37c9015264e4d64cad83e56 100644 --- a/r/GetBirthdate.R +++ b/r/GetBirthdate.R @@ -14,36 +14,42 @@ library(xml2) #' will be checked for birthdate #' #' @param article Article in HTML-format -#' @return String birthdate as string +#' @return String birthdate as string|NULL getBirthdate <- function(article) { - # Check if there is an infobox - if(!grepl("vcard", article)) { - # Check first paragraph - introduction <- getIntroduction(article) - if(!introduction == "") { - # Get birthdate inside of parentheses - birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]] - # Remove parentheses - birthdate <- substring(birthdate, 2, nchar(birthdate)-1) - } else { - # Return Null if there is no birthdate - return(NULL) - } - } - # Try to get birthdate via infobox - infoBox <- getInfoBox(article) + if(grepl("vcard", article)) { - # Get the born field - birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content + # Check if there is an infobox + infoBox <- getInfoBox(article) + + # Get the Born field + birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content + # Remove everything except the birthdate: + # - Remove everything in round brackets + birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate) + # - Remove everything starting with newline + birthdate <- gsub("\\n.*$", "", birthdate) + + return(birthdate) - # Remove everything except the birthdate - # Remove everything in round brackets - birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate) - # Remove everything starting with newline - birthdate <- gsub("\\n.*$", "", birthdate) - return(birthdate) + } else if(!getIntroduction(article) == "") { + + # Check first paragraph + introduction <- getIntroduction(article) + + # Get birthdate inside of parentheses + birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]] + # Remove parentheses + birthdate <- substring(birthdate, 2, nchar(birthdate)-1) + + return(birthdate) + + } else { + + # Return Null if there is no birthdate + return(NULL) + } } ### Converts info box to table