From 4616d41d99aa557f04c4d297b510e63da3e66e0e Mon Sep 17 00:00:00 2001 From: Lukas Gehrke <lukasgehrke@Lukass-MacBook-Pro.local> Date: Thu, 3 Jan 2019 14:29:33 +0100 Subject: [PATCH] Aendert Documentation. --- r/GetBirthplace.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/r/GetBirthplace.R b/r/GetBirthplace.R index 227b5c1..8726598 100644 --- a/r/GetBirthplace.R +++ b/r/GetBirthplace.R @@ -8,7 +8,13 @@ library(rvest) library(stringr) library(data.table) -### This script extracts Birthplace from physicist texts +#' This script extracts Birthplace from physicist texts +#' Try to get the infobox and extract the birthplace +#' If there is no infobox, 0 will be returned as +#' birthplace is hard to extract from text +#' +#' @param article Article in HTML-format +#' @return String with birthplace of the physicist|0 getBirthplace <- function(article) { # If there is no infobox we return 0 @@ -19,18 +25,17 @@ getBirthplace <- function(article) { # Use infobox to get Birthplace infoBox <- getInfoBox(article) - # get Born field + # Get 'Born' field birthplace <- infoBox[infoBox$Desc %like% "Born",]$Content - # remove everything before the "\n" - # rest is birthplace + # Remove everything in front of the "\n" + # Rest is birthplace birthplace <- gsub(".*\\\n", "", birthplace) # return birthplace return(birthplace) } -### Uses Davids function to get infobox ### Converts info box to table getInfoBox <- function(article) { # Read page as html -- GitLab