From 39ae3a9edcf31ee9af0357290a71733835d74c0a Mon Sep 17 00:00:00 2001
From: Lukas Gehrke <lukasgehrke@Lukass-MacBook-Pro.local>
Date: Thu, 3 Jan 2019 14:23:10 +0100
Subject: [PATCH] Aendert Documentation.

---
 r/GetBirthdate.R | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/r/GetBirthdate.R b/r/GetBirthdate.R
index dbe0cdd..efe63b1 100644
--- a/r/GetBirthdate.R
+++ b/r/GetBirthdate.R
@@ -1,56 +1,57 @@
 #!/usr/bin/env Rscript
-### Extracts birthdate fronm the infobox
-### If there is no infobox the Introduction text can be checked
 
 # Author: Lukas
 
-## Librarys
-
 library(rvest)
 library(stringr)
 library(data.table)
 
-### Try to extract birthdate from infobox
-### If there is no infobox, try to extract from introduction text
+#' Extract birthdate from infobox
+#' Will try to get infobox as table and extract birthdate
+#' from 'Born'-entry
+#' If there is no infobox, first paragraph of the article
+#' will be checked for birthdate
+#'
+#' @param article Article in HTML-format
+#' @return String birthdate as string
 getBirthdate <- function(article) {
-  # check
+  # Check if there is an infobox
   if(!grepl("vcard", article)) {
-    # check first paragraph
+    # Check first paragraph
     introduction <- getIntroduction(article)
     if(!introduction == "") {
-      # get birthdate inside of parentheses
+      # Get birthdate inside of parentheses
       birthdate <- str_extract_all(introduction, "\\([^()]+\\)")[[1]]
-      # remove parentheses
+      # Remove parentheses
       birthdate <- substring(birthdate, 2, nchar(birthdate)-1)
     } else {
-      # retrun Null if there is no birthdate
+      # Retrun Null if there is no birthdate
       return(0)
     }
   }
   
-  # try to get birthdate via infobox
+  # Try to get birthdate via infobox
   infoBox <- getInfoBox(article)
   
-  # get the born field
+  # Get the born field
   birthdate <- infoBox[infoBox$Desc %like% "Born",]$Content
   
-  # remove everything except the birthdate
-  # remove everything in round brackets
+  # Remove everything except the birthdate
+  # Remove everything in round brackets
   birthdate <- gsub("\\s*\\([^\\)]+\\)", "", birthdate)
-  # remove everything starting with newline
+  # Remove everything starting with newline
   birthdate <- gsub("\\n.*$", "", birthdate)
   return(birthdate)
   
 }
 
-### Uses Davids function to get infobox
 ### Converts info box to table
 getInfoBox <- function(article) {
   # Read page as html
   page <- read_html(article)
   
   # Extracting text from the html will erase all <br> tags,
-  # this will replace them with line breaks
+  # This will replace them with line breaks
   
   xml_find_all(page, ".//br") %>%
     xml_add_sibling("p", "\n")
@@ -71,7 +72,10 @@ getInfoBox <- function(article) {
   return(table)
 }
 
-### Get Introduction Text from Wikipedia page that contains birthdate
+#' Get Introduction Text from Wikipedia page that contains birthdate
+#'
+#' @param article article in HTML-format
+#' @return string introduction text from wikipedia article
 getIntroduction <- function(article) {
   # Read page as html
   page <- read_html(article)
@@ -90,8 +94,8 @@ getIntroduction <- function(article) {
     html_nodes("p") %>%
     html_text()
   
-  # there will be some leading paragraphs containing only "\n"
-  # remove those leading paragraphs
+  # There will be some leading paragraphs containing only "\n"
+  # Remove those leading paragraphs
   remove <- c("\n")
   cleaned <- setdiff(paragraphs, remove)
   introduction <- cleaned[1]
-- 
GitLab