Skip to content
Snippets Groups Projects
get_birthplace.R 793 B
Newer Older
#!/usr/bin/env Rscript

# Author: Lukas

#' This script extracts Birthplace from physicist texts
#' Try to get the infobox and extract the birthplace
#' If there is no infobox, 0 will be returned as
#' birthplace is hard to extract from text
#'
#' @export
#' @param article Article in HTML-format
#' @return String with birthplace of the physicist|0
get_birthplace <- function(article) {

  # If there is no infobox we return 0
  if(!grepl("vcard", article)) {
      return(0)
  }

  # Use infobox to get Birthplace
  infoBox <- get_infobox(article)

  # Get 'Born' field
  birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content

  # Remove everything in front of the "\n"
  # Rest is birthplace
  birthplace <- gsub(".*\\\n", "", birthplace)

  # return birthplace
  return(birthplace)
}