Commit 4024f970 authored by Robert Sachunsky's avatar Robert Sachunsky
Browse files

workflow: use wolf binarization, fix whitelisting

parent c5b7ddbb
......@@ -102,15 +102,17 @@ $(RECOGNIZED): OPTIONS = -P model deu \
-P textequiv_level glyph \
-P xpath_parameters \
'{ "contains(@custom,\"zeitraum\")": \
{ "char_whitelist": "0123456789.,-" }, \
{ "tessedit_char_whitelist": "0123456789.,- " }, \
"contains(@custom,\"kost\") and not(contains(@custom,\"anteil\"))": \
{ "char_whitelist": "0123456789.," }, \
"contains(@custom,\"einheiten\") or contains(@custom,\"temperatur\")": \
{ "char_whitelist": "0123456789." }, \
{ "tessedit_char_whitelist": "0123456789., €" }, \
"contains(@custom,\"einheiten\") or contains(@custom,\"_flaeche\") or contains(@custom,\"_verbrauch\") and not(contains(@custom(\"_einheit\")))": \
{ "tessedit_char_whitelist": "0123456789.," }, \
"contains(@custom,\"temperatur\")": \
{ "tessedit_char_whitelist": "0123456789" }, \
"contains(@custom,\"anteil\")": \
{ "char_whitelist": "0123456789%" }, \
{ "tessedit_char_whitelist": "0123456789%" }, \
"contains(@custom,\"_einheit\")": \
{ "char_whitelist": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz23²³." } \
{ "tessedit_char_whitelist": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz23²³." } \
}'
.DEFAULT_GOAL = $(RECOGNIZED)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment