Newer
Older
context("test-clean_html")
test_that("html cleansing works", {
filename_raw <- "article-4-raw.html"
filename_cleansed <- "article-4-cleansed.txt"
html <- readChar(filename_raw, file.info(filename_raw)$size)
expected <- gsub("\\s", "", readChar(filename_cleansed, file.info(filename_cleansed)$size))
actual <- gsub("\\s", "", clean_html(html))