diff --git a/docs/abschluss_presentation/Wiki_Chatbot_Architecture.png b/docs/abschluss_presentation/Wiki_Chatbot_Architecture.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d191ed84cf5359ee17b1ffaa1ddb86a66a5737c
Binary files /dev/null and b/docs/abschluss_presentation/Wiki_Chatbot_Architecture.png differ
diff --git a/docs/abschluss_presentation/aarons.png b/docs/abschluss_presentation/aarons.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f69d0517d9bbb9f5640b8b5abb3dcc80eb8c313
Binary files /dev/null and b/docs/abschluss_presentation/aarons.png differ
diff --git a/docs/abschluss_presentation/chatbot.png b/docs/abschluss_presentation/chatbot.png
new file mode 100644
index 0000000000000000000000000000000000000000..b51ed5127fed740066bfca81ee0e12946eb3e27c
Binary files /dev/null and b/docs/abschluss_presentation/chatbot.png differ
diff --git a/docs/abschluss_presentation/eval_spouse.R b/docs/abschluss_presentation/eval_spouse.R
new file mode 100644
index 0000000000000000000000000000000000000000..4312ba375d3c860e41c3e76a09fac064cfa2d10f
--- /dev/null
+++ b/docs/abschluss_presentation/eval_spouse.R
@@ -0,0 +1,75 @@
+library(ggplot2)
+
+## First of we get the spouses from all the infoboxes we got
+
+spouses <- sapply(articles$Text, function(x) {
+  info_box <- wikiproc:::get_infobox(x)
+  if(!is.na(info_box) && "Spouse(s)" %in% info_box$Desc) {
+    return(info_box[info_box$Desc == "Spouse(s)",]$Content)
+  } else {
+    return(NA)
+  }
+})
+
+# Remove names
+
+spouses <- unname(spouses)
+
+# Remove everything in parentheses and square brackets
+
+spouses <- gsub("\\s*\\([^\\)]+\\)", " ", spouses)
+spouses <- gsub("\\[.*?\\]", "", spouses)
+
+# We bind this onto the results data frame now to subset
+
+results_with_infobox <- results[, c("name", "spouse")]
+results_with_infobox$spouses_infobox <- spouses
+
+# We filter only for thoses were we got a value in both columns
+
+results_with_both <- results_with_infobox[!is.na(results_with_infobox$spouse) & !is.na(results_with_infobox$spouses_infobox),]
+
+results_with_both$distance <- apply(results_with_both, 1, function(x) {
+  adist(x[2], x[3], partial = TRUE)[1,1]
+})
+
+precision <- nrow(results_with_both[results_with_both$distance <= 5,]) / nrow(results_with_both)
+
+# Recall is a bit more difficult
+# First of we check for all articles if they even contain the word marry
+
+results_with_infobox$has_married <- apply(articles, 1, function(x) {
+  cleaned_text <- wikiproc::clean_html(x[4])
+  annotation <- wikiproc::create_annotations(cleaned_text, x[2], x[3], data.dir = data_dir)
+  tokens <- cleanNLP::cnlp_get_token(annotation)
+  if ("marry" %in% tokens$lemma) {
+    return(TRUE)
+  } else {
+    return(FALSE)
+  }
+})
+
+recall_high <- nrow(results_with_infobox[!is.na(results_with_infobox$spouse),]) / nrow(results_with_infobox[results_with_infobox$has_married,])
+
+# Another possibility to calculate stuff
+
+# Get the ones we know were married and calculate recall only for those
+
+results_with_infobox <- results_with_infobox[!is.na(results_with_infobox$spouses_infobox),]
+
+recall_low <- nrow(results_with_infobox[!is.na(results_with_infobox$spouse),]) / nrow(results_with_infobox[results_with_infobox$has_married,])
+
+
+eval_res <- data.frame(Parameter = c("Precision", "Recall (Low Estimate)", "Recall (High Estimate)"),
+                       Value = c(precision, recall_low, recall_high),
+                       stringsAsFactors = FALSE)
+
+
+p <- ggplot(eval_res, aes(x = Parameter, y = Value))
+p <- p + geom_bar(position = "dodge", stat = "identity", fill = "blue", width = 0.5)
+p <- p + geom_text(aes(label = sprintf("%0.2f", round(Value, digits = 2))), vjust = 0, nudge_y = 0.01)
+p <- p + theme_minimal()
+p <- p + theme(axis.title.x = element_blank(), axis.title.y = element_blank())
+p
+
+ggsave("spouse_eval.png", path = "plots", width = 5, height = 4, units = "in")
diff --git a/docs/abschluss_presentation/kurchatov.png b/docs/abschluss_presentation/kurchatov.png
new file mode 100644
index 0000000000000000000000000000000000000000..7bf5bfb18e1852ac2b908e640d992502e823028c
Binary files /dev/null and b/docs/abschluss_presentation/kurchatov.png differ
diff --git a/docs/abschluss_presentation/plots.R b/docs/abschluss_presentation/plots.R
new file mode 100644
index 0000000000000000000000000000000000000000..24496c5d2bf64262fb16411bcac18ee87ab25021
--- /dev/null
+++ b/docs/abschluss_presentation/plots.R
@@ -0,0 +1,32 @@
+library(ggplot2)
+
+# This assumes that the master script ran through and the result data frame is available
+
+feature_count <- sapply(results, function(col) sum(length(which(!is.na(col)))))
+feature_count <- data.frame(Count <- feature_count,
+                            Feature <- colnames(results),
+                            stringsAsFactors = FALSE)
+feature_count <- feature_count[feature_count$Count > 0 & feature_count$Feature != "name",]
+colnames(feature_count) <- c("Count", "Feature")
+
+# Flipped coord barplot
+p <- ggplot(feature_count, aes(x = reorder(Feature, Count), y = Count, fill = Feature))
+p <- p + geom_bar(stat = "identity", width = 0.5)
+p <- p + geom_hline(aes(yintercept = 983), colour = "red")
+p <- p + coord_flip()
+p <- p + guides(fill = FALSE)
+p <- p + theme_minimal()
+p <- p + labs(x = "Feature")
+p
+ggsave("feature_count_flip.png", path = "plots", width = 4, height = 4, units = "in")
+
+# Normal barplot
+p <- ggplot(feature_count, aes(x = reorder(Feature, -Count), y = Count, fill = Feature))
+p <- p + geom_bar(stat = "identity", width = 0.7)
+p <- p + geom_hline(aes(yintercept = 983), colour = "red")
+p <- p + theme_minimal()
+p <- p + theme(axis.text.x=element_blank())
+p <- p + labs(x = "Feature")
+p
+ggsave("feature_count_normal.png", path = "plots", width = 5, height = 4, units = "in")
+
diff --git a/docs/abschluss_presentation/plots/feature_count_flip.png b/docs/abschluss_presentation/plots/feature_count_flip.png
new file mode 100644
index 0000000000000000000000000000000000000000..2cbcbbd3f3c0be108da5880bc31b6589a067a493
Binary files /dev/null and b/docs/abschluss_presentation/plots/feature_count_flip.png differ
diff --git a/docs/abschluss_presentation/plots/feature_count_normal.png b/docs/abschluss_presentation/plots/feature_count_normal.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c5b044608dab8dfbe35ca0f25e9278a37d73091
Binary files /dev/null and b/docs/abschluss_presentation/plots/feature_count_normal.png differ
diff --git a/docs/abschluss_presentation/plots/precision_birthdate.png b/docs/abschluss_presentation/plots/precision_birthdate.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c090e08374beae9983e50e01c99284309ebd5d5
Binary files /dev/null and b/docs/abschluss_presentation/plots/precision_birthdate.png differ
diff --git a/docs/abschluss_presentation/plots/rasa_architecture.png b/docs/abschluss_presentation/plots/rasa_architecture.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e60bc8d8fe31539ca0b83a9b2db5aa88010de0f
Binary files /dev/null and b/docs/abschluss_presentation/plots/rasa_architecture.png differ
diff --git a/docs/abschluss_presentation/plots/spouse_eval.png b/docs/abschluss_presentation/plots/spouse_eval.png
new file mode 100644
index 0000000000000000000000000000000000000000..b5edd379f8e8c635810dbd7f95924bec3fc877d6
Binary files /dev/null and b/docs/abschluss_presentation/plots/spouse_eval.png differ
diff --git a/docs/abschluss_presentation/presentation.md b/docs/abschluss_presentation/presentation.md
new file mode 100644
index 0000000000000000000000000000000000000000..2370d98d69f1f4ac550e3c1aacd4731ae9fd103e
--- /dev/null
+++ b/docs/abschluss_presentation/presentation.md
@@ -0,0 +1,217 @@
+---
+title: Rasa Chatbot Demo
+separator: <!--s-->
+verticalSeparator: <!--v-->
+theme: solarized
+revealOptions:
+    transition: 'convex'
+    controls: true
+    slideNumber: true
+    
+---
+
+### Wikipedia Fact Chatbot
+
+David Fuhry, Lukas Gehrke, Leonard Haas, 
+
+Lucas Schons, Jonas Wolff
+
+<img src="chatbot.png" width="40%">
+
+<!--s-->
+
+### Inhalt
+
+* Thema & Forschungsfrage
+* LÃ¶sungsansatz
+* Ergebnisse
+
+<!--s-->
+
+### Thema - RASA Chatbot
+
+<section style="text-align: left;">
+RASA ist ein Framework fÃ¼r die Nutzung von **Chatbots** im Bereich _Conversational AI_. Das System muss mit **Facts** befÃ¼llt werden, um Antworten generieren zu kÃ¶nnen.
+
+_"The Rasa Stack is a set of open source machine learning tools for developers to create contextual chatbots and assistants."_
+
+<!--s-->
+
+### Thema - Architektur von RASA
+
+<img src="plots/rasa_architecture.png" width="70%">
+Note: Bild von Rasa NLU und Core Architektur erklÃ¤ren
+
+<!--s-->
+
+### Forschungsfrage
+
+- _Kann man dieses Wissen aus Texten akquirieren?_  
+- _Kann dies automatisch mit Textquellen durchgefÃ¼hrt werden?_  
+
+Dies soll anhand der _Wikipedia-EintrÃ¤ge von Physikern_ erprobt werden
+
+<!--s-->
+
+### Inhalt
+
+* ~~Thema & Forschungsfrage~~
+* LÃ¶sungsansatz
+* Ergebnisse
+
+<!--s-->
+
+### LÃ¶sungsansatz - RASA Bot konfigurierern
+
+* FÃ¼r Training eines Bots werden _entities_, _text_ und _intents_ benÃ¶tigt
+* Definition _intent_ aus [RASA Docs](https://rasa.com/docs/nlu/dataformat/):  
+_"The intent is the intent that should be associated with the text."_
+* Antworten Ã¼ber _Custom Actions_, dafÃ¼r **Wissensbasis** benÃ¶tigt
+
+<!--s-->
+
+### LÃ¶sungsansatz - RASA Bot konfigurierern
+
+#### Aufstellen von Intents zu Physikern:
+* awards
+* birthdate
+* birthplace
+* spouse
+* university
+* area of research
+* ...
+
+<!--s-->
+
+### LÃ¶sungsansatz - Datenquelle
+
+* 982 englischsprachige Wikipedia-Artikel
+* Scraping Ã¼ber jeweiligen Link mit `wikipediR`
+* HTML-Formatierung
+* Ca. 550 WÃ¶rter/Artikel
+<img src="kurchatov.png" width="70%">
+
+<!--s-->
+
+### LÃ¶sungsansatz - Processing
+
+```
+â”œâ”€â”€ packages.list
+â”œâ”€â”€ script
+â”‚Â Â  â””â”€â”€ master.R
+â””â”€â”€ wikiproc
+    â”œâ”€â”€ DESCRIPTION
+    â”œâ”€â”€ NAMESPACE
+    â”œâ”€â”€ R
+    â”‚Â Â  â”œâ”€â”€ get_awards.R
+    â”‚   â”œâ”€â”€ get_birthdate.R
+    â”‚   â”œâ”€â”€ get_birthplace.R
+    â”‚   â”œâ”€â”€ get_data.R
+    â”‚   â”œâ”€â”€ get_spouse.R
+    â”‚   â””â”€â”€ get_university.R
+    â”œâ”€â”€ man
+    â””â”€â”€ tests
+```
+Note: Zu jedem Intent ein R-Skript. Master lÃ¤dt die Artikel, speichert sie in einem Dataframe und ruft fÃ¼r jeden Artikel das clean_html-Skript und die Intent-processing-Skripte auf. Die extrahierten Informationen werden in einem neuen data-frame gespeichert. AuÃŸerdem wird das .tsv fÃ¼r den Bot generiert
+
+<!--s-->
+
+### LÃ¶sungsansatz - Gesamtarchitektur
+
+<img src="Wiki_Chatbot_Architecture.png" width="100%">
+Note: Schaubild der Gesamtarchitektur einfÃ¼gen, AUF JEDEN FALL mit docker-Wal
+
+<!--s-->
+
+### LÃ¶sungsansatz - Processing, Extraktion Intents
+   
+#### `R/get_awards.R`
+
+* Annahme: Alle Auszeichnungen im Text werden von spacy getaggt
+* Matching aller Entities eines Textes gegen Menge an StichwÃ¶rtern.
+
+<!--s-->
+
+### LÃ¶sungsansatz - Processing, Extraktion Intents
+
+* Beispiel `R/get_spouse.R`
+* Identifiziere SÃ¤tze Ã¼ber SchlÃ¼sselwort _marry_ (lemma)
+* Nutze _Pattern_ auf _POS-Tags_
+* Verifiziere Ergebnisse Ã¼ber Physikernamen sowie _NER-Entities_
+
+<!--s-->
+
+### VorfÃ¼hrung RASA - Bot
+
+Note: I see what you did there
+
+<!--s-->
+
+### Inhalt
+
+* ~~Thema & Forschungsfrage~~
+* ~~LÃ¶sungsansatz~~
+* Ergebnisse
+
+<!--s-->
+
+### Ergebnisse
+
+##### Anzahl gewonnener Ergebnisse zu Intents; Rest ist `NA`
+<img src="plots/feature_count_flip.png" width="50%">
+Note: fancy plots mit precision und recall zu awards, birthdate und spouse
+
+<!--s-->
+
+### Ergebnisse
+
+<img src="plots/precision_birthdate.png" width="70%">
+Note: Die Auswertung erfolgte hÃ¤ndisch Ã¼ber die ersten 300 Ergebnisse von get_birthdate.R mit den Ergebnissen der infobox als ground truth (auch wenn hier tlw NA steht). Als partial match wurden solch Ergebnisse gewertet, die sinnvolle Daten sind und bis auf das Fehlen des Tages mit dem Referenzwert aus der Infobox Ã¼bereinstimmen (BSP: infobox: "3 May 1960"; get_birthdate: "May 1960"). Als full match wurden solche Ergebnisse gewertet, die sinnvolle Daten sind und als Zeitangabe gegenÃ¼ber der infobox nicht weniger ausfÃ¼hrlich sind (kein Fehlen von Tag etc)
+
+
+<!--s-->
+
+##### Auswertung zu 'get_spouse.R'
+
+<img src="plots/spouse_eval.png" width="50%">
+Note: Recall noch ausbaufÃ¤hig Ã¼ber integration weiterer Pattern.
+
+<!--s-->
+
+### Bewertung Software/Datengrundlage I
+
+#### 1. Rasa-Bot
+* (+) NLU funktioniert in Rasa sehr gut
+* (+) Frei konfigurierbares Skript (actions.py)
+* (-) RASA-Software schwierig aufzusetzen 
+* (-) RASA wurde kurz vor Projektbeginn stark umgestellt
+Note: Tutorial, Dokumenation schlagartig veraltet; keine Dokumentation in Beispielen, keine Beispiele in der Dokumentation
+
+<!--s-->
+
+### Bewertung Software/Datengrundlage II
+
+#### 2. Wikipedia Artikel
+
+* (+) Relativ einheitlicher Aufbau (Einleitung, Werdegang etc.)
+* (-) Unterschiedlich ausfÃ¼hrlich, tlw. Fehlen von Daten
+
+<!--s-->
+
+### Beantwortung der Forschungsfrage
+
+* Definieren der Intents fÃ¼r den Bot sollte vorab geschehen  
+* Informationen zu Intents unterschiedlich schwierig aus Text zu extrahieren  
+Note: WÃ¤re mÃ¶glich, aber nicht sinnvoll, denn Intents sind Grundlage fÃ¼r Funktionieren von Rasa-Architektur; Verfahren teilweise allgemein verwendbar, tlw. auch von der DomÃ¤ne abhÃ¤ngig
+
+<!--s-->
+
+### Ergebnisse
+
+<section style="text-align: left;">
+_Kann man dieses Wissen aus Texten akquirieren?_  
+_Kann man dies automatisch mit Textquellen durchfÃ¼hren?_  
+
+Es ist mÃ¶glich, Fakten zu _vordefinierten Intents_ aus Texten zu extrahieren und diese _dem Bot zur VerfÃ¼gung zu stellen_.
+
+<!--s-->
diff --git a/docs/abschluss_presentation/presentation.pdf b/docs/abschluss_presentation/presentation.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..2176db3dbaf7808ff4c642d1c19cb4962bd6ee15
Binary files /dev/null and b/docs/abschluss_presentation/presentation.pdf differ
diff --git a/docs/abschluss_presentation/rasa_architecture.png b/docs/abschluss_presentation/rasa_architecture.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e60bc8d8fe31539ca0b83a9b2db5aa88010de0f
Binary files /dev/null and b/docs/abschluss_presentation/rasa_architecture.png differ
diff --git a/docs/abschluss_presentation/speaker_notes.md b/docs/abschluss_presentation/speaker_notes.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e4e27722be883535495b2b59d30065cc9a3fbf1
--- /dev/null
+++ b/docs/abschluss_presentation/speaker_notes.md
@@ -0,0 +1,138 @@
+# Recap - Ãœberblick Ã¼ber Thema/Aufgabe
+
+Rasa ist ein Python-Framework fÃ¼r die Implementation von Chatbots im Bereich Conversational AI. ([Definition](https://www.iotforall.com/what-is-conversational-ai/) Conversational AI: A set of technologies that enable computers to simulate real conversations). Chatbots sollen dabei konkret die Funktion von stÃ¤ndig verfÃ¼gbaren "GesprÃ¤chspartnern" Ã¼bernehmen, die auf einer Website AuskÃ¼nfte oder Anleitungen geben. Daraus ergeben sich verschiedene Vorteile: FÃ¼r Kunden ist rund um die Uhr ein GesprÃ¤chsservice verfÃ¼gbar. AuÃŸerdem verbuchen Unternehmen, die Chatbots nutzen, Einsparungen im Servicepersonalbereich.
+* Funktionsweise Rasa:
+  - NLU understands the userâ€™s message based on your previous training data:
+    - Intent classification: Interpreting meaning based on predefined intents (Example: Please send the confirmation to amy@example.com is a provide_email intent with 93% confidence)
+    - Entity extraction: Recognizing structured data (Example: amy@example.com is an email)
+  - Core decides what happens next in this conversation. Itâ€™s machine learning-based dialogue management predicts the next best action based on the input from NLU, the conversation history and your training data. (Example: Core has a confidence of 87% that ask_primary_change is the next best action to confirm with the user if they want to change their primary contact information.)
+
+Mit Core kÃ¶nnen zudem sogenannte [Custom Actions](https://rasa.com/docs/core/customactions/) ausgefÃ¼hrt werden. Dabei kann Ã¼ber einen Endpoint beliebiger Code durch den Bot ausgefÃ¼hrt werden.
+
+
+**Forschungsfrage:** Der Bot braucht _Wissen_ fÃ¼r die Reaktion auf erkannte Intents des Users.
+  * Kann dieses Wissen aus Texten akquiriert werden?
+  * Kann dies automatisch mit Textquellen durchgefÃ¼hrt werden?
+    (Beispielfragen: Wer war Albert Einstein? Was hat Albert Einstein erfunden? Wo hat er gelebt?)
+
+
+# LÃ¶sungsansatz
+
+### Prototyp und Intents
+
+Zur Beantwortung der Forschungsfrage haben wir zunÃ¤chst einen Bot-Prototypen aufgesetzt. DafÃ¼r war der Download von Rasa NLU und Rasa Core Ã¼ber pip oder conda nÃ¶tig. AuÃŸerdem ist fÃ¼r Rasa eine Umgebung mit Python 3.6 notwendig. DafÃ¼r wurde miniconda genutzt. Beim Konfigurieren des Bots war festzustellen, dass fÃ¼r das Training der NLU-Komponente aus der Kommunikation des Users zu extrahierende EntitÃ¤ten und Intents festgelegt werden mÃ¼ssen. Aus diesem Grund legten wurden die ersten Intents festgelegt, mit denen der Prototyp trainiert werden sollte.
+```
+  - birth - "Where and when was $physicist born?"
+  - isAlive - "Is $physicist still alive?"
+  - education - "Where did $physicist go to school?"
+  - researchArea - "What did $physicist discover?"
+  - hasNobelPrize - "Did $physicist win the Nobel Prize?"
+
+```
+Bei der Nutzung des Bots fiel schnell auf, dass fÃ¼r die Beantwotung der Fragen eine Wissensbasis der Form (physicist x, intent y) benÃ¶tigt wÃ¼rde.
+
+| `data`   | intent1 | intent2 | ... |
+|----------|---------|---------|-----|
+|physicist1| _data_  | _data_  | ... |
+|physicist1| _data_  | _data_  | ... |
+|   ...    |  ...    |  ...    | ... |
+
+### Datengewinn
+
+Daher wurde der Ansatz gewÃ¤hlt _zu einer vorbestimmten Menge an Intents mittels Text Mining-Verfahren das nÃ¶tige Wissen zu akquirieren_. Zu der gegebenen Auswahl an Intents wurden, wie in der Aufgabenstellung gefordert, Wikipedia-Artikel beschafft. Dazu wurde auf den Artikel [List of physicists](https://en.wikipedia.org/wiki/List_of_physicists) zurÃ¼ckgegriffen, der zu dem Zeitpunkt Verlinkungen auf 982 Artikel zu berÃ¼hmten Physikern enthielt. Das Scrapping wird mittels eines R-Skriptes durchgefÃ¼hrt, das das R-Paket 'WikipediR' nutzt. AnschlieÃŸend liegen in einem R-Dataframe alle Artikel als HTML vor
+
+```
+#!/usr/bin/env Rscript
+...
+page <- xml2::read_html("https://en.wikipedia.org/wiki/List_of_physicists")
+...
+article <- WikipediR::page_content("en", "wikipedia", page_name = x, as_wikitext = FALSE)
+...
+```
+
+### Processing
+
+Um aus den gewonnenen Wikipedia-Artikeln das nÃ¶tige Wissen fÃ¼r den Bot zu generieren, wurde ein eigenes R-Paket erstellt: `processing/wikiproc`. Das Paket enthÃ¤lt zunÃ¤chst einmal ein Skript zum Entfernen von HTML-Tags und sonstigen Formatierungsbestandteilen des Textes: `clean_html.R` Mithilfe dieses Skriptes kann entsprechend dem Forschungsfeld 'Text Mining' auf _natÃ¼rlichsprachigem Text_ gearbeitet werden.
+Weiterhin enthÃ¤lt das Paket _ein Skript pro Intent_, um mithilfe von auf den Intent zugeschnittenen Verfahren das gesuchte Wissen aus den Artikeln zu extrahieren.
+
+```
+wikiproc
+â”œâ”€â”€ DESCRIPTION
+â”œâ”€â”€ NAMESPACE
+â”œâ”€â”€ R
+â”‚Â Â  â”œâ”€â”€ clean_html.R
+â”‚Â Â  â”œâ”€â”€ get_awards.R
+â”‚Â Â  â”œâ”€â”€ get_birthdate.R
+â”‚Â Â  â”œâ”€â”€ get_birthplace.R
+â”‚Â Â  â”œâ”€â”€ get_data.R
+â”‚Â Â  â”œâ”€â”€ get_spouse.R
+â”‚Â Â  â””â”€â”€ get_university.R
+â””â”€â”€ tests
+```
+
+### Processing - Beispiele
+
+`get_birthdate.R`:
+FÃ¼r die Gewinnug des Geburtsdatums aus einem Text werden zunÃ¤chst durch Nutzung des Paketes 'cleanNLP.R' alles DATE-Entities aus dem Text gewonnen. Davon wird die erste erkannte EntitÃ¤t genutzt.
+Mithilfe von Regex wird zudem ein evtl Ã¼ber '-' anhÃ¤ngendes Todesdatum abgeschnitten sowie Klammern und Whitespaces entfernt.
+
+`get_spouse.R`:
+Der Ehepartner wird Ã¼ber NER und Patterns aus dem jeweiligen Text extrahiert. Die Patterns sind Vektoren aus POS-Tags und Wildcards fÃ¼r die Position der gesuchten EntitÃ¤t davor oder dahinter, dem Token 'marry' und Wildcards fÃ¼r die Position der gesuchten EntitÃ¤t davor oder dahinter. Mit diesen Patterns und der Menge aller PERSON Entities aus dem jeweiligen Text wird die Pattern-Matching-Funktion in 'utils.R' aufgerufen.
+
+## Architektur
+
+Die Extraktion des Daten mittels Skripten aus 'wikiproc' wird Ã¼ber ein zentrales Skript - `master.R`
+ausgefÃ¼hrt, die die Artikel lÃ¤dt und in einer Iteration einen Dataframe erstellt. Aus diesem Dataframe wird schlieÃŸlich ein .tsv-File erstellt: `data.tsv`. Dieses File ist die Wissensbasis fÃ¼r den Rasa-Bot.
+Der Rasa Bot kann durch Custom-Actions zu einem _erkannten Intent_ und einer _erkannten physicist-EntitÃ¤t_ `data.tsv` durchsuchen und seinen Treffer als Antwort ausgeben. Damit wird der Bot zum Experten Ã¼ber voher festgelegte Themen zu den Physikern aus den gewonnenen Artikeln.
+
+![](Wiki_Chatbot_Architecture.png)
+
+# Ergebnisse
+
+### Evaluation der Ergebnisse
+
+![Recall](plots/feature_count_normal.png)
+
+Mit den behandelten Intents lieÃŸen sich eine hohe Precision und ein geringer Recall erzielen.
+Vor allem das Pattern-Matching bei 'spouse' liefert immer die richtige Information. Dennoch wird in ca. 90% der FÃ¤lle kein Ergebnis geliefert, da die nÃ¶tige Information entweder _im Text fehlt_ oder Ã¼ber ein _anderes Pattern extrahiert_ werden mÃ¼sste. In diesem Fall z.B. Ã¼ber `"* husband * *", "* wife * *"`; genutzt wird nur `"* marry * *"`.
+
+Bei anderen Intents, die Ã¼ber NER und regulÃ¤re AusdrÃ¼cke extrahiert werden, ist die Precision geringer, da diese auf relativ naiven Angaben basieren ("Das erste Datum ist das Geburtsdatum"). Zudem ist die QualitÃ¤t der gewonnenen Daten nicht sehr gut - oftmals sind sie sehr heterogen
+```
+  [1] "October 3, 1921"                    "23 January 1840"                   
+  [3] "May 1960"                           "August 20, 1918"                   
+  [5] "June 25, 1928"                      "December 4, 1913"                  
+  [7] "November 30, 1939"                  "December 13, 1724"                 
+  [9] "1976"                               "March 28, 1964"                    
+ [11] "15 March 1930"                      "May 1908"           
+```
+
+```
+ [64] "Stalin Prize, National Prize"                                                                  
+ [65] "Nobel Price"
+ [66] NA                                                 
+ [67] NA
+ [68] NA
+ [69] "Nobel Price, Rumford Medal, Helmholtz Medal, Barnard Medal"
+ [70] "Nobel Price"                              
+ [71] NA                                             
+ [72] "Scientific American Trophy for, Volta Prize, The Volta Prize, Albert Medal, Edison Medal, Alexander Graham Bell Medal"        
+ [73] "Nobel Price"
+ [74] "Nobel Price, Special Breakthrough Prize in Fundamental Physics"
+ [75] NA
+ [76] NA
+```
+
+* Bewertung RASA-Software
+  - Setup schwierig
+  - Keine Beispiel in Manuals, keine Dokumentation in Beispielen
+
+* Bewertung Wikipedia-Artikel als unstrukturierte Daten als Grundlage
+  - (+) relativ Ã¤hnlicher Aufbau
+  - (-) dennoch im Detail unterschiede ("persÃ¶nliche Note" stÃ¤ndig unterschiedlicher Autoren)
+  - (-) unterschiedlich ausfÃ¼hrlich
+
+* Beantwortung der Forschungsfrage
+> Kann man dieses Wissen aus Texten generieren?
+  - Ja, nutze Pattern, NER
+> LÃ¤sst sich dieses Wissen automatisch generieren?
+  - Nein. Intents sollten vorher generiert werden, dann lÃ¤sst sich das Wissen mit **auf den Intent und die Datengrundlage zugeschnittenen Verfahren** extrahieren
\ No newline at end of file
diff --git a/docs/final-report/img/Wiki_Chatbot_Architecture.png b/docs/final-report/img/Wiki_Chatbot_Architecture.png
new file mode 100644
index 0000000000000000000000000000000000000000..6d191ed84cf5359ee17b1ffaa1ddb86a66a5737c
Binary files /dev/null and b/docs/final-report/img/Wiki_Chatbot_Architecture.png differ
diff --git a/docs/final-report/report.tex b/docs/final-report/report.tex
index aa7ba44dc1a019c655e98824a5631b8c3a60ba9f..582bd3c2c88121a5dde37398f12529c5ce5b522b 100644
--- a/docs/final-report/report.tex
+++ b/docs/final-report/report.tex
@@ -5,6 +5,8 @@
 \usepackage{ngerman}
 \usepackage[]{listings}
 \usepackage{hyperref}
+\usepackage{graphicx}
+\graphicspath{{./img/}}
 
 \title{Text Mining Lab \\ Training Rasa-Chatbots with Natural Language Texts \\ Project Report}
 \author{David Fuhry \\ Leonard Haas \\ Lukas Gehrke \\ Lucas Schons \\ Jonas Wolff}
@@ -18,59 +20,141 @@
 
 \pagebreak
 
-% To be edited - my (Lukas) suggestion so far
 \section{Project Description}
-    \subsection{Converstaional AI and Training}
-    Conversational AI describes computer systems that users can interact with by having a conversation. One important goal is to make the conversation seem as natural as possible. Ideally, an interacting user should assume to be interacting with another human beeing. This can make communication with a computer become very pleasant and easy for humaning beeings as they are simply using the language the always use. Besides there is no need for menu interaction with the system and thus no learning curve required.
-    % TODO add example use case (website information)
-    % TODO add more benefits (24/7 availability)
+
+    \subsection{Conversational AI and Training}
+    Conversational AI describes computer systems that users can interact with by having a
+    conversation. One important goal is to make the conversation seem as natural as possible.
+    Ideally, an interacting user should assume to be interacting with another human. This
+    can make communication with a computer become very pleasant and easy for humans as
+    they are simply using their natural language. Besides there is no need for menu
+    interaction with the system and thus no learning curve.
     \\ Conversational AI can be used in Voice Assistants that communicate through spoken words or
-    through chatbots that imitate a human beeing one is chatting with by text messages.
-    \subsection{Rasa Framwork}
-    Rasa is a collection of frameworks for conversational AI software. The Rasa Stack contains two open source libraries called Rasa NLU and Rasa Core that can be used to create contextual chatbots. Rasa NLU is a library for natural language understanding with intent classification and entity extraction Rasa Core is a Chatbot framework with machine learning based dialogue management. Both can be uses independently but rasa recommends using both.
-    % TODO add description of how a rasa bot must be trained to achieve results
+    through chatbots that imitate a human by sending text messages.
+
+    \subsection{Rasa Framework}
+    Rasa is a collection of tools for conversational AI software. The Rasa Stack contains two
+    open source libraries called Rasa NLU and Rasa Core that can be used to create contextual
+    chatbots. Rasa NLU is a library for natural language understanding with intent classification
+    and entity extraction Rasa Core is a chatbot framework with machine learning based dialogue
+    management. Both can be uses independently but rasa recommends using both.
+    \\ A Rasa Bot needs training data to work properly. The NLU component must be provided with example questions for each \textit{intent} it will have to deal with. Inside of these questions, \textit{entities} must be marked in order to train rasa where to extract these from.
+    The Core component requires example conversation flows and utterance templates for training. Examples can be seen in \ref{rasa_chatbot}.
 
     \subsection{Research Question}
-    The objective of this project is to find out, wether chatbots can be trained with natural language texts \textit{automatically}. There are two inital research questions: Given that chatbots need to be trained with knowledge, called facts.
+    The objective of this project is to find out, wether chatbots can be trained with natural
+    language texts \textit{automatically}. There are two initial research questions: Given that
+    chatbots need to be trained with knowledge, called facts:
     \begin{itemize}
-        \item Can these facts be extracted from natural language text?
-        \item Can this be done automaitcally? 
+        \item can these facts be extracted from natural language text?
+        \item can this be done automatically? 
     \end{itemize}
     
-\section{Solution Approach}
+\section{Approach}
+
     \subsection{Project Goals}
     
 		\subsection{Rasa Setup and Intents}
 
-		The Rasa-Stack consists of two components: \textit{Rasa-Core} and \textit{Rasa-NLU}. The \textit{Rasa-NLU} component takes care of getting user input and matching it with the respective intents. It also extracts all possibly provided entities and stores them in variables, called ``slots''. After that, the \textit{Rasa-Core} component executes all actions associated with the determined intent.
+		The Rasa-Stack consists of two components: \textit{Rasa-Core} and \textit{Rasa-NLU}. The \textit{Rasa-NLU} component takes care of getting user input and matching it with the respective intents. It also extracts all possibly provided entities and stores them in variables, called ``slots''. After that, the \textit{Rasa-Core} component executes all actions associated with the determined intent. 
 
     \subsection{Scrapping of Source Texts}
+    Wikipedia was choosen as resource for texts as it provides texts of relatively long length in a somewhat uniform manner.
+    While Wikipedia does have a \textit{Physicists} category\footnote{\url{https://en.wikipedia.org/wiki/Category:Physicists}}, 
+    it is fragmented into somewhat arbitrary subcategories and thus not optimal to use as a collection.
+    However Wikipedia also has a \textit{List of physicists} which contains 981 physicists and was used to build the collection used. \\
+    Data scraping was done using the R Package \textit{WikipediR}, a wrapper around the Wikipedia API.
+    Articles were downloaded as HTML\footnote{HTML was choosen over wikitext to ease text cleaning} and afterwards strapped of all HTML Tags and Quotation marks. 
+    
     \subsection{Fact Extraction Approaches}
+    Fact extraction greatly varies depending on the nature of the fact to extract.
+    As all approaches leverage on some form of NER or POS tagging, annotations were created for all text.
+    This was done using the R Package \textit{cleanNLP} with an spaCy backend to create NER and POS tags, as well as lemmatization. \\
+    Fact extraction for physicists spouses was done using pre-defined patterns on word lemmata.\footnote{Functionality to use patterns on POS Tags is also available but did not yield a better outcome.}
+    A pattern is consists of word lemmata to be matched (including wildcards) as well as defined places to look for the name of the phisicit as well as his/her spouse.
+    When a matching phrase is found the results are verified by checking that the corresponding physicist is mentioned as well as the potential spouse beeing detected as a Person by the NER tagger.
 
 \section{Software Architecture}
-    \subsection{Rasa Chatbot}
-    The Rasa Chatbot built for this project uses both Rasa Stack components - \textit{Rasa Core} and \textit{Rasa NLU}. Configuration has been organised in reference to examples from the Rasa github repository. \\ Rasa NLU has been trained with example questions in Markdown format that cotain highlighted enities. This ensures the bot to understand intents and extract the entities inside the sentences. One example can be seen in Figure \ref{nlu_example}. \\
-    \lstinputlisting[label={nlu_example}, caption={NLU example}]{nlu_example.md} 
-    Rasa Core has been configured with \textit{stories} that contain example conversation flows as training data \ref{stories_example} and the \textit{domain} of the bot. The domain contains all actions, entities, slots, intents, and templates the bot deals with. \textit{Templates} means template strings for bot utterances. \textit{Slots} are variables that can hold different values. The bot proposed in this project uses a slot to store the name of a recognized physicist entity for instance. According to the Rasa website \footnote{\url{https://rasa.com/docs/get_started_step2/}}, the domain is \textit{the universe the bot is living in}. \\
+
+    \subsection{Rasa Chatbot} \label{rasa_chatbot}
+    The chatbot built for this project uses both Rasa Stack components - \textit{Rasa Core}
+    and \textit{Rasa NLU}. Configuration has been organized in reference to examples from the Rasa
+    github repository. \\ Rasa NLU has been trained with example questions in markdown format that
+    contain highlighted entities. This ensures that the bot is able to understand intents and
+    extract the entities inside the sentences. One example can be seen in listing \ref{nlu_example}. \\
+
+    \lstinputlisting[label={nlu_example}, caption={NLU example}]{nlu_example.md}
+
+    Rasa Core has been configured with \textit{stories} that contain example conversation flows as
+    training data (listing \ref{stories_example}) and the \textit{domain} of the bot. The domain
+    contains all actions, entities, slots, intents, and templates the bot deals with. \textit
+    {Templates} means template strings for bot utterances. \textit{Slots} are variables that can
+    hold different values. The bot proposed in this project uses a slot to store the name of a
+    recognized physicist entity. According to the Rasa website
+    \footnote{\url{https://rasa.com/docs/get_started_step2/}}
+    , the domain is \textit{the universe the bot is living in}. \\
+
     \lstinputlisting[label={stories_example}, caption={Example Story}]{stories_example.md}
-    The bot recognizes the intents shown in Figure \ref{intent_plot}. It can be started through \textit{MAKE}-commands. For further details, please refer to the README \footnote{\url{https://git.informatik.uni-leipzig.de/text-mining-chatbot/wiki-rasa/blob/master/README.md}}. Development of the bot was focused on proof of concept so there is not a lot of natural conversation ability available.
-    % TODO complete table.
+
+    The bot recognizes the intents shown in the table on page \pageref
+    {table:intent_table}. It can be started through
+    \textit{MAKE}-commands. For further details, please refer to the README
+    \footnote{
+    \url{https://git.informatik.uni-leipzig.de/text-mining-chatbot/wiki-rasa/blob/master/README.md}}
+
+    Development of the bot was focused on proof of concept so there is not a lot of natural
+    conversation ability available.
 
     \begin{center}
-        \begin{tabular}{| l | l | l |}
-            \hline
-            No & Intent & Example \\ \hline
-            1 & birthdate & When was Albert Einstein born \\ \hline
-            1 & birthdate & When was Albert Einstein born \\ \hline
-            1 & birthdate & When was Albert Einstein born \\ \hline
-            1 & birthdate & When was Albert Einstein born \\
-            \hline
-        \end{tabular}
+        \begin{table}
+            \begin{tabular}{| c | l | l |}
+                \hline
+                No & Intent & Example \\ \hline
+                1 & birthdate & When was Albert Einstein born \\ \hline
+                2 & nationality & Where was Albert Einstein born \\ \hline
+                3 & day of death & When did Albert Einstein die \\ \hline
+                4 & place of death & Where did Albert Einstein die \\ \hline
+                5 & is alive & Is Albert Einstein still alive \\ \hline
+                6 & spouse & Who was Albert Einstein married to \\ \hline
+                7 & primary education & Where did Albert Einstein go to school \\ \hline
+                8 & university & Which university did Albert Einstein attend \\ \hline
+                9 & area of research & What was Albert Einstein area of research \\ \hline
+                10 & workplace & Where did Albert Einstein work \\ \hline
+                11 & awards & What awards did Albert Einstein win \\ \hline
+            \end{tabular}
+            \caption{Intents that are recognized by the bot}
+            \label{table:intent_table}
+        \end{table}
     \end{center}
 
     \subsection{R Package 'wikiproc'}
+    All functionality to extract facts, download data from wikipedia as well as some utility functions 
+    is encapsulated inside the \textit{wikiproc} R Package. 
+    This allows for a better management of dependencys as well as inclusion of unit tests for fact extraction methods.
+    
+
+    \begin{table}
+        \centering
+        \begin{tabular}{| l | l |}
+            \hline
+            Function & Category \\ \hline \hline
+            clean\textunderscore html & Utility \\ \hline
+            create\textunderscore annotations & Utility \\ \hline
+            init\textunderscore nlp & Utility \\ \hline
+            get\textunderscore data & Data scraping \\ \hline
+            get\textunderscore awards & Fact extraction \\ \hline
+            get\textunderscore birthdate & Fact extraction \\ \hline
+            get\textunderscore birthplace & Fact extraction \\ \hline
+            get\textunderscore spouse & Fact extraction \\ \hline
+            get\textunderscore university & Fact extraction \\ \hline
+        \end{tabular}
+        \caption{Exported functions of the wikiproc package}
+        \label{table:intent_table}
+    \end{table}
+
     \subsection{Interworking of R and Rasa}
-    %TODO add architecture chart made by Lucas
+
+    \includegraphics[width=\textwidth]{Wiki_Chatbot_Architecture}
 
 
 \section{Results}
diff --git a/docs/img/result_data_frame.png b/docs/img/result_data_frame.png
new file mode 100644
index 0000000000000000000000000000000000000000..5982b7b79ac801b3f93ee18994dff333a6995bc0
Binary files /dev/null and b/docs/img/result_data_frame.png differ
diff --git a/docs/img/wiki-rasa-proj-struct.png b/docs/img/wiki-rasa-proj-struct.png
new file mode 100644
index 0000000000000000000000000000000000000000..642d6d56a1bd038ac2071f3c9c5ff02888f591fd
Binary files /dev/null and b/docs/img/wiki-rasa-proj-struct.png differ