Skip to content
Snippets Groups Projects
Master.R 1.39 KiB
Newer Older
David Fuhry's avatar
David Fuhry committed
#!/usr/bin/env Rscript


David Fuhry's avatar
David Fuhry committed
### This script consolidates everything

## Librarys

David Fuhry's avatar
David Fuhry committed
library(pbapply)

David Fuhry's avatar
David Fuhry committed
#library(SomeLibrary)

## Load Scripts

David Fuhry's avatar
David Fuhry committed
cat("Sourcing R scripts... ")

David Fuhry's avatar
David Fuhry committed
source("r/GetData.R")
source("r/GetNoOfSpouses.R")
source("r/CleanHtml.R")
David Fuhry's avatar
David Fuhry committed
source("r/ProcessNER.R")
David Fuhry's avatar
David Fuhry committed
#source("r/getSomethingElse.R")
David Fuhry's avatar
David Fuhry committed

David Fuhry's avatar
David Fuhry committed
cat("Done.\n")

David Fuhry's avatar
David Fuhry committed
## Fetch data
David Fuhry's avatar
David Fuhry committed

David Fuhry's avatar
David Fuhry committed
cat("Starting data import...\n")

David Fuhry's avatar
David Fuhry committed
articles <- getData(use.cache = TRUE)
David Fuhry's avatar
David Fuhry committed

## Data processing

David Fuhry's avatar
David Fuhry committed
cat("Processing data:\n")
David Fuhry's avatar
David Fuhry committed

David Fuhry's avatar
David Fuhry committed
results <- pbapply(articles, 1, function(article) {
  # Within this function article is a vector representing a single row of our original data frame
  # This means article[1] represents the Title, article[2] the PageID etc.
  
David Fuhry's avatar
David Fuhry committed
  ## Data cleaning
  
  cleaned.text <- cleanHtml(article[4])
David Fuhry's avatar
David Fuhry committed
  
  ## Data preprocessing/annotating
  
David Fuhry's avatar
David Fuhry committed
  annotation <- createAnnotations(cleaned.text, article[2], article[3])
David Fuhry's avatar
David Fuhry committed
  
  ## Extract information from Text
  
  no.spouses <- getNoOfSpouses(article[4])
  
David Fuhry's avatar
David Fuhry committed
  # someFact <- getFactFromTextFunctioN(annotated.text)
  
David Fuhry's avatar
David Fuhry committed
  # someOtherFact <- getOtherFactFromText(data$Text)
David Fuhry's avatar
David Fuhry committed
  
  ## Create Results
  
  data.frame(Name = article[1],
David Fuhry's avatar
David Fuhry committed
             NoSpouses = no.spouses,
             stringsAsFactors = FALSE)
David Fuhry's avatar
David Fuhry committed
  
})

results <- do.call(rbind, results)

cat("Data processing finished.\n")

David Fuhry's avatar
David Fuhry committed
## Results are now in results

## Format for rasa 

cat("Writing rasa files to 'rasa/'...\n")

David Fuhry's avatar
David Fuhry committed
# someFormatFunction(results)

cat("Data processing finished.\n")