Skip to content
Snippets Groups Projects
Commit efd32184 authored by Lukas Gehrke's avatar Lukas Gehrke
Browse files

Aendert if-Verzweigung in getBirthdate.R.

parent a40ac74b
No related branches found
No related tags found
1 merge request!21Resolve "R-Skript für Birthdate erstellen"
...@@ -14,36 +14,42 @@ library(xml2) ...@@ -14,36 +14,42 @@ library(xml2)
#' will be checked for birthdate #' will be checked for birthdate
#' #'
#' @param article Article in HTML-format #' @param article Article in HTML-format
#' @return String birthdate as string #' @return String birthdate as string|NULL
getBirthdate <- function(article) { getBirthdate <- function(article) {
# Check if there is an infobox
if(!grepl("vcard", article)) {
# Check first paragraph
introduction <- getIntroduction(article)
if(!introduction == "") {
# Get birthdate inside of parentheses
birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]]
# Remove parentheses
birthdate <- substring(birthdate, 2, nchar(birthdate)-1)
} else {
# Return Null if there is no birthdate
return(NULL)
}
}
# Try to get birthdate via infobox if(grepl("vcard", article)) {
infoBox <- getInfoBox(article)
# Get the born field # Check if there is an infobox
birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content infoBox <- getInfoBox(article)
# Get the Born field
birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content
# Remove everything except the birthdate:
# - Remove everything in round brackets
birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate)
# - Remove everything starting with newline
birthdate <- gsub("\\n.*$", "", birthdate)
return(birthdate)
# Remove everything except the birthdate
# Remove everything in round brackets
birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate)
# Remove everything starting with newline
birthdate <- gsub("\\n.*$", "", birthdate)
return(birthdate)
} else if(!getIntroduction(article) == "") {
# Check first paragraph
introduction <- getIntroduction(article)
# Get birthdate inside of parentheses
birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]]
# Remove parentheses
birthdate <- substring(birthdate, 2, nchar(birthdate)-1)
return(birthdate)
} else {
# Return Null if there is no birthdate
return(NULL)
}
} }
### Converts info box to table ### Converts info box to table
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment