Commit c911975c authored by Dominik Schwabe's avatar Dominik Schwabe
Browse files

reworked markup working

parent d9e3f15a
......@@ -112,7 +112,7 @@ router.post(
async (req, res, next) => {
try {
const { summarizers, text, ratio } = req.body;
const original = isURL(text)
let original = isURL(text)
? await articleDownloader.download(text)
: text;
let summariesText = await summarize(summarizers, original, ratio);
......@@ -120,6 +120,7 @@ router.post(
for (const [metric, result] of Object.entries(summariesText)) {
summaries[metric] = await sentenceSplitter.split(result);
}
original = await sentenceSplitter.split(original)
return res.json({ original, summaries });
} catch (err) {
return next(err);
......
import React from "react";
import React, { useState } from "react";
import { usePagination } from "../hooks/pagination";
import { Markup } from "./Markup";
import { Markup } from "./utils/Markup";
import { Pagination } from "./utils/Pagination";
const MarkupEntry = ({ row, hypothesis, reference }) => {
const markupState = useState();
return (
<tr>
<td>{row}</td>
<td>
<Markup markups={reference} markupState={markupState} />
</td>
<td>
<Markup markups={hypothesis} markupState={markupState} />
</td>
</tr>
);
};
const ComparisonDisplay = ({ page, size, comparisons }) => (
<table className="uk-table uk-table-divider uk-table-small uk-table-middle">
<thead>
......@@ -14,16 +29,8 @@ const ComparisonDisplay = ({ page, size, comparisons }) => (
</tr>
</thead>
<tbody>
{comparisons.slice((page - 1) * size, page * size).map(([index, hyp, ref]) => (
<tr key={index}>
<td>{index}</td>
<td>
<Markup markupedText={ref} />
</td>
<td>
<Markup markupedText={hyp} />
</td>
</tr>
{comparisons.slice((page - 1) * size, page * size).map(([index, hypothesis, reference]) => (
<MarkupEntry key={index} row={index} hypothesis={hypothesis} reference={reference} />
))}
</tbody>
</table>
......
import React from "react";
const Markup = ({ markupedText, showMarkup = true }) => (
<>
{markupedText.map(([text, classNames], i) => (
<span key={i} className={showMarkup ? classNames.join(" ") : ""}>
{text}
</span>
))}
</>
);
export { Markup };
......@@ -3,9 +3,9 @@ import React, { useContext, useMemo, useState } from "react";
import { evaluateRequest } from "../api";
import { MetricsContext } from "../contexts/MetricsContext";
import { flatten } from "../utils/flatScores";
import { markup } from "../utils/fragcolors";
import { computeMarkup } from "../utils/markup";
import { displayMessage } from "../utils/message";
import { Markup } from "./Markup";
import { Markup } from "./utils/Markup";
import { ScoreTable } from "./ScoreTable";
import { Button } from "./utils/Button";
import { InfoText } from "./utils/InfoText";
......@@ -23,10 +23,10 @@ const OneHypRefResult = ({ className, calculation }) => {
<tbody>
<tr>
<td>
<Markup markupedText={hypothesis} />
<Markup markups={hypothesis} />
</td>
<td>
<Markup markupedText={reference} />
<Markup markups={reference} />
</td>
</tr>
</tbody>
......@@ -67,7 +67,7 @@ const OneHypRef = () => {
setIsComputing(true);
evaluateRequest(getChosenMetrics(settings), [hypText], [refText])
.then(({ scores }) => {
const [hypothesis, reference] = markup(hypText, refText);
const [hypothesis, reference] = computeMarkup(hypText, refText);
setEvaluateResult({ scores, hypothesis, reference });
})
.catch((e) => displayMessage(e))
......
......@@ -2,12 +2,12 @@ import React, { useContext, useMemo } from "react";
import { MetricsContext } from "../contexts/MetricsContext";
import { flatten } from "../utils/flatScores";
import { markup } from "../utils/fragcolors";
import { computeMarkup } from "../utils/markup";
import { CompareTable } from "./CompareTable";
import { ScoreTable } from "./ScoreTable";
const ResultInfo = ({ scores, hypotheses, references }) => {
const comparisons = useMemo(() => hypotheses.map((hyp, i) => markup(hyp, references[i])), [
const comparisons = useMemo(() => hypotheses.map((hyp, i) => computeMarkup(hyp, references[i])), [
hypotheses,
references,
]);
......
......@@ -2,7 +2,7 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from "react"
import UIkit from "uikit";
import { flatten } from "../utils/flatScores";
import { markup } from "../utils/fragcolors";
import { computeMarkup } from "../utils/markup";
import { CompareTable } from "./CompareTable";
import { ScoreTable } from "./ScoreTable";
import { DeleteButton } from "./utils/DeleteButton";
......@@ -18,7 +18,7 @@ const SavedInfo = ({ index, ID, getCalculationScores, getCalculationLines, delet
if (loadRef.current && loadRef.current.className.includes("uk-active")) {
const { hypotheses, references } = getCalculationLines(ID);
setComparisons(
hypotheses.map((hyp, i) => markup(hyp, references[i])),
hypotheses.map((hyp, i) => computeMarkup(hyp, references[i])),
[hypotheses, references]
);
} else UIkit.util.once(document, "show", `#${toggleID}`, showEvent);
......
This diff is collapsed.
......@@ -5,9 +5,9 @@ import { useKeycode } from "../hooks/keycode";
import { useList } from "../hooks/list";
import { usePagination } from "../hooks/pagination";
import { useSavedVisualizations } from "../hooks/savedVisualizations";
import { markup } from "../utils/fragcolors";
import { computeMarkup } from "../utils/markup";
import { displayMessage } from "../utils/message";
import { Markup } from "./Markup";
import { Markup } from "./utils/Markup";
import { Accordion, AccordionItem } from "./utils/Accordion";
import { Button } from "./utils/Button";
import { Card, CardBody, CardHeader, CardTitle } from "./utils/Card";
......@@ -23,9 +23,7 @@ const ModelModal = ({ isOpen, setIsOpen, models, addModel, otherLines, forceSame
const [fileName, setFile, lines] = useFile(null);
const linesAreSame = sameLength([lines, ...otherLines]);
const close = () => {
setIsOpen(false);
};
const close = () => setIsOpen(false);
const modelIsValid = () => !Object.values(models).some((model) => model.name === name);
const accept = () => {
if (!name) {
......@@ -252,9 +250,9 @@ const useMarkup = (doc, reference, models) => {
case null:
return [];
case "reference":
return markup(doc, reference);
return computeMarkup(doc, reference);
default:
return markup(doc, models[slot][1]);
return computeMarkup(doc, models[slot][1]);
}
}, [slot, doc, reference, models]);
return [docMarkup, refMarkup, slot, toggleSlot];
......@@ -273,7 +271,7 @@ const VisualizeContent = ({ doc, reference, models }) => {
<CardHeader>
<CardTitle>Document</CardTitle>
</CardHeader>
<CardBody>{docMarkup ? <Markup markupedText={docMarkup} /> : doc}</CardBody>
<CardBody>{docMarkup ? <Markup markups={docMarkup} /> : doc}</CardBody>
</Card>
</div>
<div>
......@@ -290,7 +288,7 @@ const VisualizeContent = ({ doc, reference, models }) => {
</div>
</CardTitle>
</CardHeader>
<CardBody>{referenceSelected ? <Markup markupedText={refMarkup} /> : reference}</CardBody>
<CardBody>{referenceSelected ? <Markup markups={refMarkup} /> : reference}</CardBody>
</Card>
{models.map(([name, modelLine], i) => {
const modelSelected = slot === i;
......@@ -309,7 +307,7 @@ const VisualizeContent = ({ doc, reference, models }) => {
</div>
</CardTitle>
</CardHeader>
<CardBody>{modelSelected ? <Markup markupedText={refMarkup} /> : modelLine}</CardBody>
<CardBody>{modelSelected ? <Markup markups={refMarkup} /> : modelLine}</CardBody>
</Card>
);
})}
......
import React, { useMemo, useState } from "react";
import { computeMarkup } from "../../utils/markup";
const useMarkup = (text, sum) => useMemo(() => computeMarkup(text, sum), [text, sum]);
const innerHoverStyle = { background: "yellow", color: "black", display: "relative" };
const baseMarkupStyle = { padding: "2px", borderRadius: "0px" };
const outerHoverStyle = { ...baseMarkupStyle, ...innerHoverStyle };
const TaggedMarkup = ({ markup, markupState, showMarkup }) => {
let props = {};
let style = {};
const [content, tag, bgcolor, fgcolor] = markup;
if (showMarkup) style = { ...baseMarkupStyle, background: bgcolor, color: fgcolor };
if (markupState) {
const [currMarkup, setCurrMarkup] = markupState;
if (tag === currMarkup) style = showMarkup ? outerHoverStyle : innerHoverStyle;
const onMouseEnter = showMarkup ? () => setCurrMarkup(tag) : null;
const onMouseLeave = showMarkup ? () => setCurrMarkup(null) : null;
props = { onMouseEnter, onMouseLeave };
}
return (
<span {...props} style={style}>
<Markup markups={content} markupState={markupState} showMarkup={false} />
</span>
);
};
const Markup = ({ markups, markupState, showMarkup = true }) => (
<>
{markups.map((child, i) =>
typeof child === "string" ? (
<span key={i}>{child}</span>
) : (
<TaggedMarkup key={i} markup={child} markupState={markupState} showMarkup={showMarkup} />
)
)}{" "}
</>
);
export { Markup, useMarkup };
import React from "react";
import { Markup } from "../Markup";
const MarkupDisplayer = ({ paragraphedText, name, showMarkup, onHighlight }) => (
<div
className="uk-card uk-card-default uk-card-body uk-card-small uk-margin"
style={{ border: "1px", borderColor: "grey", borderStyle: "solid" }}
>
<h1 className="uk-card-title uk-text-capitalize uk-flex uk-flex-between">
{name}
<button className="uk-button-primary" onClick={onHighlight}>
{showMarkup ? "hide highlighting" : "show highlighting"}
</button>
</h1>
{paragraphedText.map((markupedText, i) => (
<p key={i}>
<Markup markupedText={markupedText} showMarkup={showMarkup} />
</p>
))}
</div>
);
export { MarkupDisplayer };
/* eslint-disable */
/* Copyright 2012 Marcusb @ Vroniplag Wiki.
Licensed under GNU General Public License v3 or later.
Modified by Dominik Schwabe @ University Leipzig 2020 */
/* Requires no jquery. */
// http://www.dweebd.com/javascript/binary-search-an-array-in-javascript/
const binarySearch = (list, find, comparator) => {
let low = 0;
let high = list.length - 1;
let i;
let comparison;
while (low <= high) {
i = Math.floor((low + high) / 2);
comparison = comparator(list[i], find);
if (comparison < 0) {
low = i + 1;
} else if (comparison > 0) {
high = i - 1;
} else {
return i;
}
}
return -1;
};
/* Text comparison. */
const cmp_text = (documents, min_run_length) => {
const no_self_similarities = true;
/* documents is an array of token lists. Each token list is an array of tokens.
tokens are strings that must not contain the special "\x01" character.
The return is a list of matches in the form:
[ doc_1, start_1, doc_2, start_token_2, length ]
where doc_X are indices into the documents array, and start_X
are indices into the respective token list of the documents.
*/
const final_match_list = [];
/* For each min_length token run in each document, we store [ doc, start ]. */
const match_table = {};
const docs = documents.length;
const documents_len = [];
for (let doc_idx = 0; doc_idx < docs; doc_idx++) {
const doc = documents[doc_idx];
const tokens = doc.length - min_run_length + 1;
const doc_len = doc.length;
/* Record the length of each document. */
documents_len[doc_idx] = doc_len;
if (tokens <= 0)
/* Document is not long enough to have any matches. */
continue;
/* We don't report another match until we have skipped over
all the tokens in the last match. */
let min_token_idx = 0;
for (let token_idx = 0; token_idx < tokens; token_idx++) {
const match = doc.slice(token_idx, token_idx + min_run_length);
const match_loc = [doc_idx, token_idx];
const match_tag = match.join("\x01");
if (match_tag in match_table) {
if (token_idx >= min_token_idx) {
/* If there are matches, find the best and record it. */
const best_match = [doc_idx, token_idx, null, 0, 0];
const matches = match_table[match_tag];
const nr_matches = matches.length;
for (let idx = 0; idx < nr_matches; idx++) {
const match_peer = matches[idx];
const peer_doc_idx = match_peer[0];
const peer_doc = documents[peer_doc_idx];
let peer_token_idx = match_peer[1] + min_run_length;
const peer_len = documents_len[peer_doc_idx];
let our_token_idx = token_idx + min_run_length;
if (no_self_similarities && peer_doc_idx === doc_idx) {
/* Self similarity, skip for now. FIXME:
Make this an option. Note: If we allow
self-similarities, there can be
overlapping matches like in: "a b c d a
b c d a b c d" which has matches "[1: a
b c d [2: a b c d :1] a b c d :2],
which is a coloring problem. */
continue;
}
while (
peer_token_idx < peer_len &&
our_token_idx < doc_len &&
peer_doc[peer_token_idx] === doc[our_token_idx]
) {
peer_token_idx++;
our_token_idx++;
}
const len = our_token_idx - token_idx;
if (len > best_match[4]) {
/* We found a better match. */
best_match[2] = match_peer[0];
best_match[3] = match_peer[1];
best_match[4] = len;
}
}
/* Any good match found? Record it. */
if (best_match[2] !== null) {
final_match_list.push(best_match);
min_token_idx = token_idx + best_match[4];
}
}
/* In any case, we keep this location as a possible future
match. */
match_table[match_tag].push(match_loc);
} else {
match_table[match_tag] = [match_loc];
}
}
}
return final_match_list;
};
const wordspaceTokens = (text) => {
const words = [];
const tokens = text
.replace(/ ?n['`’]t/g, "n't")
.replace(/ ?['`’']s/g, "'s")
.replace(/ ?['`’']m/g, "'m")
.match(/[^\s-]+|[\s-]+/g);
if (tokens.length > 0) {
let i = 0;
if (tokens[0].match(/[\s-]/)) {
words.push([null, tokens[0]]);
i = 1;
}
for (let len = tokens.length - 1; i < len; i += 2) {
words.push([tokens[i], tokens[i + 1]]);
}
if (i === tokens.length - 1) {
words.push([tokens[i], null]);
}
}
return words;
};
class textblock {
constructor(text) {
this.wstokens = wordspaceTokens(text);
/* words is an array of [word, position] where position is a
scalar. word may be null (indicating a leading whitespace)! */
this.words = this.wstokens.map((wordspace, i) => [wordspace[0], i]);
/* Each element in markups is [start, end, classlist], where start
and end are inclusive positions and classlist is an array of
class names to apply. To make the clipping a lot easier, we
keep the list sorted and free of gaps (classlist is empty for
an unused range). */
this.markups = [[0, this.words.length - 1, []]];
}
_cut_before = (pos) => {
const markups = this.markups;
/* Find the range that contains pos. */
const loc = binarySearch(markups, pos, (markup, pos) => {
if (markup[1] < pos) return -1;
else if (markup[0] > pos) return 1;
else return 0;
});
if (loc < 0)
/* Should never happen. */
return;
const markup = markups[loc];
if (pos === markup[0]) return;
const end = markup[1];
markup[1] = pos - 1;
markups.splice(loc + 1, 0, [pos, end, markup[2].slice()]);
};
apply_class = (cssclass, startpos, endpos) => {
/* Mark a region for a CSS class. */
/* Cutting the existing regions at the positions we want to
apply a class makes the following algorithm a lot easier,
because existing regions are then either completely
contained in the new range or completely outside. Because
we start with the full range, all ranges exist, so no gaps
need to be filled. */
if (endpos === startpos) {
// skip single word if stopword
const w = this.words[endpos][0].toLowerCase();
if (
w.match(
/\b(a|about|above|after|again|against|all|am|an|and|any|are|as|at|be|because|been|before|being|below|between|both|but|by|can|did|do|does|doing|don|down|during|each|few|for|from|further|had|has|have|having|he|her|here|hers|herself|him|himself|his|how|i|if|in|into|is|it|its|itself|just|me|more|most|my|myself|no|nor|not|now|of|off|on|once|only|or|other|our|ours|ourselves|out|over|own|s|same|she|should|so|some|such|t|than|that|the|their|theirs|them|themselves|then|there|these|they|this|those|through|to|too|under|until|up|very|was|we|were|what|when|where|which|while|who|whom|why|will|with|you|your|yours|yourself|yourselves)\b/
)
) {
return;
}
}
this._cut_before(startpos);
this._cut_before(endpos + 1);
/* Now we can copy the existing ranges into the new list,
adding missing classes as we encounter them. */
const markups = this.markups;
for (let i = 0, ilen = markups.length; i < ilen; ++i) {
const markup = markups[i];
const mstart = markup[0];
const mend = markup[1];
if (startpos <= mstart && mend <= endpos) {
/* Add the cssclass if it is missing. */
if (markup[2].indexOf(cssclass) < 0) markup[2].push(cssclass);
}
}
};
markup = () => {
const wstokens = this.wstokens;
const markupedText = [];
for (const mark of this.markups) {
const [jstart, jend, classlist] = mark;
let substr = "";
for (let j = jstart; j <= jend; j++) {
let [word, whitespace] = wstokens[j];
if (word !== null) substr += word;
if (j !== jend) substr += whitespace;
}
markupedText.push([substr, classlist]);
/* Add last whitespace. */
const lastWhitespace = wstokens[jend][1];
if (lastWhitespace !== null) markupedText.push([lastWhitespace, []]);
}
return markupedText;
};
}
/* Extract a cleaned up list. */
const clean_list = (tokenidlist) => {
const tokens = [];
const ids = [];
for (const tokenid of tokenidlist) {
let [token, id] = tokenid;
if (token === null) {
continue;
}
token = token.replace("ß", "ss");
token = token.replace(/[^a-zäöüA-ZÄÖÜ0-9\u0410-\u044F-]/g, "");
token = token.toLowerCase();
if (token !== "") {
tokens.push(token);
ids.push(id);
}
}
return [tokens, ids];
};
const markup = (hyp, ref) => {
const refTextblock = new textblock(ref);
const hypTextblock = new textblock(hyp);
const [hypDoc, hypIds] = clean_list(hypTextblock.words);
const [refDoc, refIds] = clean_list(refTextblock.words);
const textblocks = [refTextblock, hypTextblock];
const docs = [refDoc, hypDoc];
const ids = [refIds, hypIds];
const len = 3;
const sims = cmp_text(docs, len);
const nr_col = 9;
let col = 0;
for (const [doc_1, start_1, doc_2, start_2, length] of sims) {
textblocks[doc_1].apply_class(
"fragmark" + (col + 1),
ids[doc_1][start_1],
ids[doc_1][start_1 + length - 1]
);
textblocks[doc_2].apply_class(
"fragmark" + (col + 1),
ids[doc_2][start_2],
ids[doc_2][start_2 + length - 1]
);
col = (col + 1) % nr_col;
}