Skip to content
Snippets Groups Projects
Commit ecf20cc3 authored by Lukas Gehrke's avatar Lukas Gehrke
Browse files

Refactors get_birthplace to get_nationality.

parent 320531b1
No related branches found
No related tags found
1 merge request!52Resolve: "Refactor get_birthplace.R to get_nationality.R"
......@@ -43,13 +43,13 @@ results <- pbapply(articles, 1, function(article) {
awards_found <- get_awards(annotation)
university_found <- get_university(annotation)
birthdate_found <- get_birthdate(cleaned_text, annotation)
birthplace_found <- get_birthplace(cleaned_text, annotation)
nationality_found <- get_nationality(cleaned_text, annotation)
## Create Results
data.frame(name = article[1],
spouse = spouse_found,
birthplace = birthplace_found,
nationality = nationality_found,
birthdate = birthdate_found,
day_of_death = NA,
place_of_death = NA,
......
#!/usr/bin/env Rscript
# Author: Lukas
#' This script extracts Birthplace from physicist texts
#' Try to search text for first occurrence of an NORP entity
#' in a sentence containing born
#'
#' @export
#' @param article Article in HTML-format
#' @param annotations CNLP Annotations
#' @return String with birthplace of the physicist|NA
get_birthplace <- function(article) {
# Try to extract birthplace from text as first NORP entity sentence
# - Get tokens
# - Get entities
# - Get word column of tokens
text <- clean_html(article)
annotations <- cnlp_annotate(text)
tokens_obj <- cnlp_get_token(annotations)
entities_obj <- cnlp_get_entity(annotations)
words <- tokens_obj$word
# - Find position of "born" in tokens_obj
# - Get sid of sentence with "born"
# TODO catch case with no occurrence
born_occurrence <- which(words %in% "born")
if (length(born_occurrence == 0)) {
return(NA)
}
born_sid <- tokens_obj[born_occurrence[1], 2]
# - Get NORPS-entities
norps <- entities_obj[entities_obj$entity_type == "NORP",]
# - Assume birthplace is first norps entitiy in sentence with "born"
birthplace <- norps[norps$sid == born_sid,]$entity[1]
ifelse(length(birthplace) >= 1, birthplace, NA)
}
#!/usr/bin/env Rscript
# Author: Lukas
#' This script extracts Nationality from physicist texts
#' Try to search text for first occurrence of an NORP entity
#'
#' @export
#' @param article Article in HTML-format
#' @param annotations CNLP Annotations
#' @return String with nationality of the physicist|NA
get_nationality <- function(article, annotations) {
# Try to extract nationality from text as first NORP entity sentence
# - Get entities
# - Get NORPs
# - Get first NORP
# - Return NA if length < 4 because some empty strings and garbage would be returned else
entities_obj <- cnlp_get_entity(annotations)
norps <- entities_obj[entities_obj$entity_type == "NORP",]
nationality <- norps$entity[1]
ifelse(nchar(nationality) >= 4, nationality, NA)
}
context("test-get_birthplace")
context("test-get_nationality")
test_that("retrieving birthplace works", {
test_that("retrieving nationality works", {
filename_raw <- "article-4-raw.html"
html <- readChar(filename_raw, file.info(filename_raw)$size)
expected <- ("Yaycı, Nakhchivan, Azerbaijan Democratic Republic")
actual <- get_birthplace(html)
expected <- ("Russian")
actual <- get_nationality(html)
expect_equal(expected, actual)
})
......@@ -10,7 +10,7 @@ Schema for Actions
class ActionSearchInfo(Action):
Starts Search Action for an Information, depending on
found intent (Intent 'birthplace' -> 'ActionSearchBirthplace')
found intent (Intent 'nationality' -> 'ActionSearchNationality')
- Obtains slot value of recognized physicist entity
- Iterates over 'data.tsv' to find demanded info for given physicist
- Stores value of info in its slot
......@@ -24,40 +24,40 @@ class ActionUtterInfo(Action):
"""
#
# Birthplace
# nationality
#
class ActionSearchBirthplace(Action):
class ActionSearchNationality(Action):
def name(self):
return 'action_search_birthplace'
return 'action_search_nationality'
def run(self, dispatcher, tracker, domain):
import csv
import re
person = tracker.get_slot('physicist')
name_regex = re.compile(person, re.IGNORECASE)
actual_birthplace = None
actual_nationality = None
if os.path.isfile('data.tsv'):
with open('data.tsv') as csvfile:
spamreader = csv.DictReader(csvfile, delimiter='\t')
for row in spamreader:
if name_regex.match(row['name']):
actual_birthplace = row['birthplace']
actual_nationality = row['nationality']
else:
dispatcher.utter_message("Error: Data not generated!")
return [SlotSet('birthplace', actual_birthplace
if actual_birthplace is not None and actual_birthplace is not ""
return [SlotSet('nationality', actual_nationality
if actual_nationality is not None and actual_nationality is not ""
else "not known")]
class ActionUtterBirthplace(Action):
class ActionUtterNationality(Action):
def name(self):
return 'action_utter_birthplace'
return 'action_utter_nationality'
def run(self, dispatcher, tracker, domain):
person = tracker.get_slot('physicist')
birthplace = tracker.get_slot('birthplace')
dispatcher.utter_message("Birthplace of {} is {}.".format(person, birthplace))
nationality = tracker.get_slot('nationality')
dispatcher.utter_message("Nationality of {} is {}.".format(person, nationality))
return []
......
......@@ -2,7 +2,7 @@ intents:
- greet
- goodbye
- birthdate
- birthplace
- nationality
- day_of_death
- place_of_death
- is_alive
......@@ -16,8 +16,8 @@ intents:
actions:
- utter_greet
- utter_goodbye
- action_search_birthplace
- action_utter_birthplace
- action_search_nationality
- action_utter_nationality
- action_search_birthdate
- action_utter_birthdate
- action_search_day_of_death
......@@ -45,7 +45,7 @@ entities:
slots:
physicist:
type: text
birthplace:
nationality:
type: unfeaturized
birthdate:
type: unfeaturized
......
......@@ -6,11 +6,10 @@
- bye
- goodbye
## intent:birthplace
- where was [Albert Einstein](physicist) born
## intent:nationality
- what nation is [Albert Einstein](physicist) from
- what nationality does [Albert Einstein](physicist) have
- where is [Galileo Galilei](physicist) from
- what is [Albert Einstein](physicist) birthplace
- birthplace of [Galileo Galilei](physicist)
## intent:birthdate
- when was [Albert Einstein](physicist) born
......
......@@ -7,9 +7,9 @@
- utter_goodbye
## birthplace
* birthplace{"physicist": "albert einstein"}
- action_search_birthplace
- action_utter_birthplace
* nationality{"physicist": "albert einstein"}
- action_search_nationality
- action_utter_nationality
## birthdate
* birthdate{"physicist": "albert einstein}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment