diff --git a/r/GetBirthplace.R b/r/GetBirthplace.R index 227b5c129ab39afbf5a5988246836058d03747b3..8726598874e32feeffa2c015ec5bb43cfec90adc 100644 --- a/r/GetBirthplace.R +++ b/r/GetBirthplace.R @@ -8,7 +8,13 @@ library(rvest) library(stringr) library(data.table) -### This script extracts Birthplace from physicist texts +#' This script extracts Birthplace from physicist texts +#' Try to get the infobox and extract the birthplace +#' If there is no infobox, 0 will be returned as +#' birthplace is hard to extract from text +#' +#' @param article Article in HTML-format +#' @return String with birthplace of the physicist|0 getBirthplace <- function(article) { # If there is no infobox we return 0 @@ -19,18 +25,17 @@ getBirthplace <- function(article) { # Use infobox to get Birthplace infoBox <- getInfoBox(article) - # get Born field + # Get 'Born' field birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content - # remove everything before the "\n" - # rest is birthplace + # Remove everything in front of the "\n" + # Rest is birthplace birthplace <- gsub(".*\\\n", "", birthplace) # return birthplace return(birthplace) } -### Uses Davids function to get infobox ### Converts info box to table getInfoBox <- function(article) { # Read page as html