From efd32184ecbf610fd51df33f3da6d88dfed704fe Mon Sep 17 00:00:00 2001 From: Lukas Gehrke <lukasgehrke@Lukass-MacBook-Pro.local> Date: Mon, 7 Jan 2019 06:51:43 +0100 Subject: [PATCH] Aendert if-Verzweigung in getBirthdate.R. --- r/GetBirthdate.R | 56 +++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/r/GetBirthdate.R b/r/GetBirthdate.R index 21f61a5..42dbb69 100644 --- a/r/GetBirthdate.R +++ b/r/GetBirthdate.R @@ -14,36 +14,42 @@ library(xml2) #' will be checked for birthdate #' #' @param article Article in HTML-format -#' @return String birthdate as string +#' @return String birthdate as string|NULL getBirthdate <- function(article) { - # Check if there is an infobox - if(!grepl("vcard", article)) { - # Check first paragraph - introduction <- getIntroduction(article) - if(!introduction == "") { - # Get birthdate inside of parentheses - birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]] - # Remove parentheses - birthdate <- substring(birthdate, 2, nchar(birthdate)-1) - } else { - # Return Null if there is no birthdate - return(NULL) - } - } - # Try to get birthdate via infobox - infoBox <- getInfoBox(article) + if(grepl("vcard", article)) { - # Get the born field - birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content + # Check if there is an infobox + infoBox <- getInfoBox(article) + + # Get the Born field + birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content + # Remove everything except the birthdate: + # - Remove everything in round brackets + birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate) + # - Remove everything starting with newline + birthdate <- gsub("\\n.*$", "", birthdate) + + return(birthdate) - # Remove everything except the birthdate - # Remove everything in round brackets - birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate) - # Remove everything starting with newline - birthdate <- gsub("\\n.*$", "", birthdate) - return(birthdate) + } else if(!getIntroduction(article) == "") { + + # Check first paragraph + introduction <- getIntroduction(article) + + # Get birthdate inside of parentheses + birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]] + # Remove parentheses + birthdate <- substring(birthdate, 2, nchar(birthdate)-1) + + return(birthdate) + + } else { + + # Return Null if there is no birthdate + return(NULL) + } } ### Converts info box to table -- GitLab