### Utility functions used internally

#' Extract the inforbox contents from wikipedia articles
#'
#' @param article Character vector containing the contents of an wikipedia
#' article as html
#'
#' @return Data frame holding the contents of the table
#'
#' @examples
#' \dontrun{
#' articles <- get_data()
#'
#' infobox <- get_infobox(articles$Text[54])
#'
#' infobox[3:4,]
#' }
get_infobox <- function(article) {
  # Read page as html
  page <- xml2::read_html(article)

  # Extracting text from the html will erase all <br> tags,
  # this will replace them with line breaks

  xml2::xml_find_all(page, ".//br") %>%
    xml2::xml_add_sibling("p", "\n")

  xml2::xml_find_all(page, ".//br") %>%
    xml2::xml_remove(.)
  
  # Get the info box
  # Will throw an error if there isnt any, so that should be checked beforehand

  table <- page %>%
    rvest::html_nodes("table.vcard") %>%
    rvest::html_table(fill = TRUE) %>%
    .[[1]]

  colnames(table) <- c("Desc", "Content")

  return(table)
}