Newer
Older
#!/usr/bin/env Rscript
# Author: Lukas
#' This script extracts Birthplace from physicist texts
#' Try to get the infobox and extract the birthplace
#' If there is no infobox, 0 will be returned as
#' birthplace is hard to extract from text
#'
#' @export
#' @param article Article in HTML-format
#' @return String with birthplace of the physicist|0
get_birthplace <- function(article) {
# If there is no infobox we return 0
if(!grepl("vcard", article)) {
return(0)
}
# Use infobox to get Birthplace
infoBox <- get_infobox(article)
# Get 'Born' field
birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content
# Remove everything in front of the "\n"
# Rest is birthplace
birthplace <- gsub(".*\\\n", "", birthplace)
# return birthplace
return(birthplace)
}