Skip to content
Snippets Groups Projects
Commit 5e999dfd authored by Lucas Schons's avatar Lucas Schons
Browse files

Merge branch 'master' into '32-add-unit-tests-for-cleanhtml-r'

# Conflicts:
#   processing/wikiproc/R/utils.R
parents 2a5f55f8 a45c02eb
No related branches found
No related tags found
1 merge request!34Resolve "Add unit tests for clean_html.R"
...@@ -63,7 +63,7 @@ getIntroduction <- function(article) { ...@@ -63,7 +63,7 @@ getIntroduction <- function(article) {
xml2::xml_add_sibling("p", "\n") xml2::xml_add_sibling("p", "\n")
xml2::xml_find_all(page, ".//br") %>% xml2::xml_find_all(page, ".//br") %>%
xml2::xml_remove xml2::xml_remove(.)
# Get all paragraphs # Get all paragraphs
paragraphs <- page %>% paragraphs <- page %>%
...@@ -73,7 +73,7 @@ getIntroduction <- function(article) { ...@@ -73,7 +73,7 @@ getIntroduction <- function(article) {
# There will be some leading paragraphs containing only "\n" # There will be some leading paragraphs containing only "\n"
# Remove those leading paragraphs # Remove those leading paragraphs
remove <- c("\n") remove <- c("\n")
cleaned <- data.table::setdiff(paragraphs, remove) cleaned <- setdiff(paragraphs, remove)
introduction <- cleaned[1] introduction <- cleaned[1]
# Return first paragraph # Return first paragraph
......
...@@ -26,8 +26,8 @@ get_infobox <- function(article) { ...@@ -26,8 +26,8 @@ get_infobox <- function(article) {
xml2::xml_add_sibling("p", "\n") xml2::xml_add_sibling("p", "\n")
xml2::xml_find_all(page, ".//br") %>% xml2::xml_find_all(page, ".//br") %>%
xml2::xml_remove() xml2::xml_remove(.)
# Get the info box # Get the info box
# Will throw an error if there isnt any, so that should be checked beforehand # Will throw an error if there isnt any, so that should be checked beforehand
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment