From 189788339374e0a2aa9a68322824fcb6cb103209 Mon Sep 17 00:00:00 2001
From: Lulu Roth <ls80zyse@studserv.uni-leipzig.de>
Date: Sat, 12 Jan 2019 17:03:45 +0100
Subject: [PATCH] Rename files snake style

---
 processing/wikiproc/NAMESPACE                 |  3 +
 processing/wikiproc/R/GetNoOfSpouses.R        | 63 -------------------
 .../wikiproc/R/{CleanHtml.R => clean_html.R}  |  2 +-
 ...eateAnnotations.R => create_annotations.R} |  2 +-
 .../R/{GetBirthdate.R => get_birthdate.R}     |  2 +-
 .../R/{GetBirthplace.R => get_birthplace.R}   |  2 +-
 .../man/{cleanHtml.Rd => clean_html.Rd}       |  8 +--
 processing/wikiproc/man/getIntroduction.Rd    |  2 +-
 .../man/{getBirthdate.Rd => get_birthdate.Rd} |  8 +--
 .../{getBirthplace.Rd => get_birthplace.Rd}   |  8 +--
 10 files changed, 20 insertions(+), 80 deletions(-)
 delete mode 100755 processing/wikiproc/R/GetNoOfSpouses.R
 rename processing/wikiproc/R/{CleanHtml.R => clean_html.R} (97%)
 rename processing/wikiproc/R/{createAnnotations.R => create_annotations.R} (91%)
 rename processing/wikiproc/R/{GetBirthdate.R => get_birthdate.R} (98%)
 rename processing/wikiproc/R/{GetBirthplace.R => get_birthplace.R} (97%)
 rename processing/wikiproc/man/{cleanHtml.Rd => clean_html.Rd} (84%)
 rename processing/wikiproc/man/{getBirthdate.Rd => get_birthdate.Rd} (82%)
 rename processing/wikiproc/man/{getBirthplace.Rd => get_birthplace.Rd} (82%)

diff --git a/processing/wikiproc/NAMESPACE b/processing/wikiproc/NAMESPACE
index 5e2056d..23ace1c 100644
--- a/processing/wikiproc/NAMESPACE
+++ b/processing/wikiproc/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+export(clean_html)
 export(create_annotations)
+export(get_birthdate)
+export(get_birthplace)
 export(get_data)
 export(get_no_of_spouses)
 export(init_nlp)
diff --git a/processing/wikiproc/R/GetNoOfSpouses.R b/processing/wikiproc/R/GetNoOfSpouses.R
deleted file mode 100755
index d5882ce..0000000
--- a/processing/wikiproc/R/GetNoOfSpouses.R
+++ /dev/null
@@ -1,63 +0,0 @@
-### GetNoOfSpouses.R
-### This extracts the number of spouses from the infobox
-### If no infobox or no information about spouses is found assumes there are none
-### Not for use in production, this does not actually get information from text
-
-# Author: David
-
-## Librarys
-
-library(rvest)
-library(data.table)
-library(xml2)
-
-### Get number of spouses
-#' @export
-getNoOfSpouses <- function(article) {
-
-  # If there is no infobox we assume there were no spouses
-  if(!grepl("vcard", article)) {
-    return(0)
-  }
-
-  infoBox <- getInfoBox(article)
-
-  # Get the spouse field
-  spouses <- infoBox[infoBox$Desc %like% "Spouse",]$Content
-  # Remove everything in parentheses
-  spouses <- gsub("\\s*\\([^\\)]+\\)", "", spouses)
-  # Split the strings by newlines to get one spouse per line
-  spouses <- base::strsplit(spouses, "\n")
-  spouses <- base::unlist(spouses)
-  if(length(spouses) > 0) {
-    return(length(spouses))
-  }
-  return(0)
-}
-
-### Converts info box to table
-getInfoBox <- function(article) {
-  # Read page as html
-  page <- xml2::read_html(article)
-
-  # Extracting text from the html will erase all <br> tags,
-  # this will replace them with line breaks
-
-  xml2::xml_find_all(page, ".//br") %>%
-    xml2::xml_add_sibling("p", "\n")
-
-  xml2::xml_find_all(page, ".//br") %>%
-    xml2::xml_remove()
-
-  # Get the info box
-  # Will throw an error if there isnt any, so that should be checked beforehand
-
-  table <- page %>%
-    rvest::html_nodes("table.vcard") %>%
-    rvest::html_table(fill = TRUE) %>%
-    .[[1]]
-
-  colnames(table) <- c("Desc", "Content")
-
-  return(table)
-}
diff --git a/processing/wikiproc/R/CleanHtml.R b/processing/wikiproc/R/clean_html.R
similarity index 97%
rename from processing/wikiproc/R/CleanHtml.R
rename to processing/wikiproc/R/clean_html.R
index e541a1d..d0a57c2 100644
--- a/processing/wikiproc/R/CleanHtml.R
+++ b/processing/wikiproc/R/clean_html.R
@@ -9,7 +9,7 @@
 #' @export
 #' @param html Url linking to a wikipedia webpage or a html formatted document.
 #' @return Plaintext document containing only the maintext of the give wikipedia page.
-cleanHtml <- function(html) {
+clean_html <- function(html) {
 
   # 1. read data from url or html-formatted text
   # 2 .extract nodes containing main information (ignore infoboxes, list of literature, ...)
diff --git a/processing/wikiproc/R/createAnnotations.R b/processing/wikiproc/R/create_annotations.R
similarity index 91%
rename from processing/wikiproc/R/createAnnotations.R
rename to processing/wikiproc/R/create_annotations.R
index 6deb6a5..bbe1642 100644
--- a/processing/wikiproc/R/createAnnotations.R
+++ b/processing/wikiproc/R/create_annotations.R
@@ -1,7 +1,7 @@
 library(cleanNLP)
 
 #' @export
-createAnnotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE) {
+create_annotations <- function(text, article.id, article.rev.id, use.cache = TRUE, write.cache = FALSE) {
 
   # Generate filename, for some reason there paste0 will pad the article id with leading whitespaces
   # To prevent this we stip 'em again
diff --git a/processing/wikiproc/R/GetBirthdate.R b/processing/wikiproc/R/get_birthdate.R
similarity index 98%
rename from processing/wikiproc/R/GetBirthdate.R
rename to processing/wikiproc/R/get_birthdate.R
index dd980b5..3288e2d 100644
--- a/processing/wikiproc/R/GetBirthdate.R
+++ b/processing/wikiproc/R/get_birthdate.R
@@ -16,7 +16,7 @@ library(xml2)
 #' @export
 #' @param article Article in HTML-format
 #' @return String birthdate as string|NULL
-getBirthdate <- function(article) {
+get_birthdate <- function(article) {
 
   if(grepl("vcard", article)) {
 
diff --git a/processing/wikiproc/R/GetBirthplace.R b/processing/wikiproc/R/get_birthplace.R
similarity index 97%
rename from processing/wikiproc/R/GetBirthplace.R
rename to processing/wikiproc/R/get_birthplace.R
index 652217f..61839e0 100644
--- a/processing/wikiproc/R/GetBirthplace.R
+++ b/processing/wikiproc/R/get_birthplace.R
@@ -16,7 +16,7 @@ library(data.table)
 #' @export
 #' @param article Article in HTML-format
 #' @return String with birthplace of the physicist|0
-getBirthplace <- function(article) {
+get_birthplace <- function(article) {
 
   # If there is no infobox we return 0
   if(!grepl("vcard", article)) {
diff --git a/processing/wikiproc/man/cleanHtml.Rd b/processing/wikiproc/man/clean_html.Rd
similarity index 84%
rename from processing/wikiproc/man/cleanHtml.Rd
rename to processing/wikiproc/man/clean_html.Rd
index 7247852..3f49f48 100644
--- a/processing/wikiproc/man/cleanHtml.Rd
+++ b/processing/wikiproc/man/clean_html.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/CleanHtml.R
-\name{cleanHtml}
-\alias{cleanHtml}
+% Please edit documentation in R/clean_html.R
+\name{clean_html}
+\alias{clean_html}
 \title{Clean a html formatted wikipedia page.
 Nodes of interest from the DOM are extracted and then cleaned from all html
 tags and annotations.}
 \usage{
-cleanHtml(html)
+clean_html(html)
 }
 \arguments{
 \item{html}{Url linking to a wikipedia webpage or a html formatted document.}
diff --git a/processing/wikiproc/man/getIntroduction.Rd b/processing/wikiproc/man/getIntroduction.Rd
index 3dfe196..5778a54 100644
--- a/processing/wikiproc/man/getIntroduction.Rd
+++ b/processing/wikiproc/man/getIntroduction.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/GetBirthdate.R
+% Please edit documentation in R/get_birthdate.R
 \name{getIntroduction}
 \alias{getIntroduction}
 \title{Get Introduction Text from Wikipedia page that contains birthdate}
diff --git a/processing/wikiproc/man/getBirthdate.Rd b/processing/wikiproc/man/get_birthdate.Rd
similarity index 82%
rename from processing/wikiproc/man/getBirthdate.Rd
rename to processing/wikiproc/man/get_birthdate.Rd
index a614ade..1e77780 100644
--- a/processing/wikiproc/man/getBirthdate.Rd
+++ b/processing/wikiproc/man/get_birthdate.Rd
@@ -1,14 +1,14 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/GetBirthdate.R
-\name{getBirthdate}
-\alias{getBirthdate}
+% Please edit documentation in R/get_birthdate.R
+\name{get_birthdate}
+\alias{get_birthdate}
 \title{Extract birthdate from infobox
 Will try to get infobox as table and extract birthdate
 from 'Born'-entry
 If there is no infobox, first paragraph of the article
 will be checked for birthdate}
 \usage{
-getBirthdate(article)
+get_birthdate(article)
 }
 \arguments{
 \item{article}{Article in HTML-format}
diff --git a/processing/wikiproc/man/getBirthplace.Rd b/processing/wikiproc/man/get_birthplace.Rd
similarity index 82%
rename from processing/wikiproc/man/getBirthplace.Rd
rename to processing/wikiproc/man/get_birthplace.Rd
index b1b0830..0db95fe 100644
--- a/processing/wikiproc/man/getBirthplace.Rd
+++ b/processing/wikiproc/man/get_birthplace.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/GetBirthplace.R
-\name{getBirthplace}
-\alias{getBirthplace}
+% Please edit documentation in R/get_birthplace.R
+\name{get_birthplace}
+\alias{get_birthplace}
 \title{This script extracts Birthplace from physicist texts
 Try to get the infobox and extract the birthplace
 If there is no infobox, 0 will be returned as
 birthplace is hard to extract from text}
 \usage{
-getBirthplace(article)
+get_birthplace(article)
 }
 \arguments{
 \item{article}{Article in HTML-format}
-- 
GitLab