diff --git a/r/GetData.R b/r/GetData.R
index a8d39bb0d84eff49b05d8f6f7e9deb51aeaf970a..a86c4c9b0e4bc183725ddf16a0b8eb446bbb19f7 100644
--- a/r/GetData.R
+++ b/r/GetData.R
@@ -9,68 +9,91 @@ library(rvest) # For getting the list of physicits
 ## Instead we crawl the names on the article 'List of Physicists' and query those names
 ## which gives us something short of a thousand articles
 
-### Get the list of names
-
-# Download page
-
-page <- read_html("https://en.wikipedia.org/wiki/List_of_physicists")
-
-# Extract links as the names given here are not the article names in about 20 cases
-
-physicists <- page %>%
-  html_nodes(".mw-parser-output li a") %>%
-  html_attr("href")
-
-# Clean the list
-
-physicists <- physicists[nchar(physicists) > 5]
-
-length(physicists) <- length(physicists) - 3
-
-physicists <- gsub("_", " ", physicists)
-
-physicists <- gsub("/wiki/", "", physicists)
-
-physicists <- gsub("\\s*\\([^\\)]+\\)", "", physicists)
-
-# This is probably only needed on windows (and perhaps os x) as R on windows messes quite a bit with the encoding
-# On linux `physicists <- URLdecode(physicists)` should do the trick
-
-physicists <- sapply(physicists, function(x) {
-  tmp <- URLdecode(x)
-  Encoding(tmp) <- "UTF-8"
-  tmp
-})
-
-names(physicists) <- NULL
-
-
-
-### Get articles
-
-# Call the wikipedia api for each entry in our list
-
-articles <- lapply(physicists, function(x) {
-  res <- tryCatch({
-    article <- page_content("en", "wikipedia", page_name = x, as_wikitext = FALSE)
-    data.frame(Title = article$parse$title,
-               PageID = article$parse$pageid,
-               RevID = article$parse$revid,
-               Text = article$parse$text$`*`,
-               stringsAsFactors = FALSE)
-  }, error = function(e) {
-    cat("Error: Crawling failed for article ", x, "with error message: ", conditionMessage(e),"\n")
+#' Retrieve wikipedia articles about physicists
+#' 
+#' @param use.cache Use cached data if it exists over downloading new data
+#' @param write.cache Write downloaded results into cache for use on future calls
+#' @return data.frame containing the title, id, revisionID and html-formatted full text
+getData <- function(use.cache = TRUE, write.cache = FALSE) {
+  ### First we check if the data already exists and try to load it if it does
+  if(file.exists("data/articles.RDS") & use.cache ) {
+    res <- tryCatch({
+      data <- readRDS("data/articles.RDS")
+      data
+    }, error = function (e) {
+      cat("Cached data was found but could not be loaded. Downloading from wikipedia, this might take a few minutes")
+    })
+    return(res)
+  }
+  
+  ### Get the list of names
+  
+  # Download page
+  
+  page <- read_html("https://en.wikipedia.org/wiki/List_of_physicists")
+  
+  # Extract links as the names given here are not the article names in about 20 cases
+  
+  physicists <- page %>%
+    html_nodes(".mw-parser-output li a") %>%
+    html_attr("href")
+  
+  # Clean the list
+  
+  physicists <- physicists[nchar(physicists) > 5]
+  
+  length(physicists) <- length(physicists) - 3
+  
+  physicists <- gsub("_", " ", physicists)
+  
+  physicists <- gsub("/wiki/", "", physicists)
+  
+  physicists <- gsub("\\s*\\([^\\)]+\\)", "", physicists)
+  
+  # This is probably only needed on windows (and perhaps os x) as R on windows messes quite a bit with the encoding
+  # On linux `physicists <- URLdecode(physicists)` should do the trick
+  
+  physicists <- sapply(physicists, function(x) {
+    tmp <- URLdecode(x)
+    Encoding(tmp) <- "UTF-8"
+    tmp
   })
   
-})
-
-# Bind it all together
-
-articles <- do.call(rbind, articles)
-
-# Write result
+  names(physicists) <- NULL
+  
+  
+  
+  ### Get articles
+  
+  # Call the wikipedia api for each entry in our list
+  
+  articles <- lapply(physicists, function(x) {
+    res <- tryCatch({
+      article <- page_content("en", "wikipedia", page_name = x, as_wikitext = FALSE)
+      data.frame(Title = article$parse$title,
+                 PageID = article$parse$pageid,
+                 RevID = article$parse$revid,
+                 Text = article$parse$text$`*`,
+                 stringsAsFactors = FALSE)
+    }, error = function(e) {
+      cat("Error: Crawling failed for article ", x, "with error message: ", conditionMessage(e),"\n")
+    })
+    
+  })
+  
+  # Bind it all together
+  
+  articles <- do.call(rbind, articles)
+  
+  # Write result if desired
+  
+  if(write.cache) {
+    write.table(articles, "data/articles.csv")
+    saveRDS(articles, "data/articles.RDS")
+  }
+  
+  return(articles)
+}
 
-write.table(articles, "../data/articles.csv")
 
-saveRDS(articles, "../data/articles.RDS")