#!/usr/bin/env Rscript

### This script consolidates everything

library(pbapply)
library(rvest)
library(wikiproc)

## Fetch data

cat("Starting data import...\n")

articles <- getData(use.cache = FALSE, write.cache = TRUE, data.dir = "../../data/")

## Data processing

cat("Processing data:\n")

results <- pbapply(articles, 1, function(article) {
  # Within this function article is a vector representing a single row of our original data frame
  # This means article[1] represents the Title, article[2] the PageID etc.
  
  ## Data cleaning
  
  cleaned.text <- cleanHtml(article[4])
  
  ## Data preprocessing/annotating
  
  # annotation <- createAnnotations(cleaned.text, article[2], article[3])
  
  ## Extract information from Text
  
  no.spouses <- getNoOfSpouses(article[4])
  
  ## Create Results
  
  data.frame(Name = article[1],
             NoSpouses = no.spouses,
             stringsAsFactors = FALSE)
  
})

results <- do.call(rbind, results)

cat("Data processing finished.\n")

## Results are now in results

## Format for rasa 

cat("Writing rasa files to 'rasa/'...\n")

# someFormatFunction(results)

cat("Data processing finished.\n")