-
Lucas Schons authored
* Create R package containing function definitions * Create directory processing containing scripts * fix some typos * general cleanup
Lucas Schons authored* Create R package containing function definitions * Create directory processing containing scripts * fix some typos * general cleanup
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Master.R 1.13 KiB
#!/usr/bin/env Rscript
### This script consolidates everything
library(pbapply)
library(rvest)
library(wikiproc)
## Fetch data
cat("Starting data import...\n")
articles <- getData(use.cache = FALSE, write.cache = TRUE, data.dir = "../../data/")
## Data processing
cat("Processing data:\n")
results <- pbapply(articles, 1, function(article) {
# Within this function article is a vector representing a single row of our original data frame
# This means article[1] represents the Title, article[2] the PageID etc.
## Data cleaning
cleaned.text <- cleanHtml(article[4])
## Data preprocessing/annotating
# annotation <- createAnnotations(cleaned.text, article[2], article[3])
## Extract information from Text
no.spouses <- getNoOfSpouses(article[4])
## Create Results
data.frame(Name = article[1],
NoSpouses = no.spouses,
stringsAsFactors = FALSE)
})
results <- do.call(rbind, results)
cat("Data processing finished.\n")
## Results are now in results
## Format for rasa
cat("Writing rasa files to 'rasa/'...\n")
# someFormatFunction(results)
cat("Data processing finished.\n")