#!/usr/bin/env Rscript # Author: Lukas #' This script extracts Birthplace from physicist texts #' Try to get the infobox and extract the birthplace #' If there is no infobox, 0 will be returned as #' birthplace is hard to extract from text #' #' @export #' @param article Article in HTML-format #' @return String with birthplace of the physicist|0 get_birthplace <- function(article) { # If there is no infobox we return 0 if(!grepl("vcard", article)) { return(0) } # Use infobox to get Birthplace infoBox <- get_infobox(article) # Get 'Born' field birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content # Remove everything in front of the "\n" # Rest is birthplace birthplace <- gsub(".*\\\n", "", birthplace) # return birthplace return(birthplace) }