### GetNoOfSpouses.R ### This extracts the number of spouses from the infobox ### If no infobox or no information about spouses is found assumes there are none ### Not for use in production, this does not actually get information from text # Author: David #' Reads the number of spouses from the infobox of an wikipedia article #' #' @param article Wikipedia article in html format #' #' @return Integer indicating the number of spouses #' @export #' #' @examples #' \dontrun{ #' articles <- get_data() #' #' no.spouses <- get_no_of_spouses(articles$Text[54]) #' #' no,spouses #' } get_no_of_spouses <- function(article) { # If there is no infobox we assume there were no spouses if(!grepl("vcard", article)) { return(0) } infoBox <- get_infobox(article) # Get the spouse field spouses <- infoBox[infoBox$Desc %like% "Spouse",]$Content # Remove everything in parentheses spouses <- gsub("\\s*\\([^\\)]+\\)", "", spouses) # Split the strings by newlines to get one spouse per line spouses <- strsplit(spouses, "\n") spouses <- unlist(spouses) if(length(spouses) > 0) { return(length(spouses)) } return(0) }