#!/usr/bin/env Rscript ### This script consolidates everything ## Librarys library(pbapply) #library(SomeLibrary) ## Load Scripts cat("Sourcing R scripts... ") source("r/GetData.R") source("r/GetNoOfSpouses.R") source("r/CleanHtml.R") source("r/ProcessNER.R") #source("r/getSomethingElse.R") cat("Done.\n") ## Fetch data cat("Starting data import...\n") articles <- getData(use.cache = TRUE) ## Data processing cat("Processing data:\n") results <- pbapply(articles, 1, function(article) { # Within this function article is a vector representing a single row of our original data frame # This means article[1] represents the Title, article[2] the PageID etc. ## Data cleaning cleaned.text <- cleanHtml(article[4]) ## Data preprocessing/annotating annotation <- createAnnotations(cleaned.text, article[2], article[3]) ## Extract information from Text no.spouses <- getNoOfSpouses(article[4]) # someFact <- getFactFromTextFunctioN(annotated.text) # someOtherFact <- getOtherFactFromText(data$Text) ## Create Results data.frame(Name = article[1], NoSpouses = no.spouses, stringsAsFactors = FALSE) }) results <- do.call(rbind, results) cat("Data processing finished.\n") ## Results are now in results ## Format for rasa cat("Writing rasa files to 'rasa/'...\n") # someFormatFunction(results) cat("Data processing finished.\n")