From 565f99ccede67de83edd718059eb48c57bce1c5a Mon Sep 17 00:00:00 2001 From: Akash Mozumdar Date: Wed, 26 Feb 2020 00:59:47 -0700 Subject: [PATCH] implement deinflection --- extensions/extrawindow.cpp | 75 +++++++++++++++++++++++++++++++------- extensions/replacer.cpp | 6 +-- text.cpp | 14 +++++-- 3 files changed, 74 insertions(+), 21 deletions(-) diff --git a/extensions/extrawindow.cpp b/extensions/extrawindow.cpp index d09c605..5c92ca6 100644 --- a/extensions/extrawindow.cpp +++ b/extensions/extrawindow.cpp @@ -2,9 +2,11 @@ #include "extension.h" #include "ui_extrawindow.h" #include "defs.h" +#include "util.h" #include "blockmarkup.h" #include #include +#include #include #include #include @@ -260,13 +262,8 @@ private: if (ui.display->text().mid(i) == dictionaryWindow.term) return dictionaryWindow.ShowDefinition(); dictionaryWindow.ui.display->setFixedWidth(ui.display->width() * 3 / 4); dictionaryWindow.setTerm(ui.display->text().mid(i)); - int home = i == 0 ? 0 : textPositionMap[i - 1].x(), away = textPositionMap[i].x(), x = 0; - if (textPositionMap[i].x() > ui.display->width() / 2) - { - std::swap(home, away); - x -= dictionaryWindow.width(); - } - x += (home * 3 + away) / 4; + int left = i == 0 ? 0 : textPositionMap[i - 1].x(), right = textPositionMap[i].x(), + x = textPositionMap[i].x() > ui.display->width() / 2 ? -dictionaryWindow.width() + (right * 3 + left) / 4 : (left * 3 + right) / 4; dictionaryWindow.move(ui.display->mapToGlobal(QPoint(x, textPositionMap[i].y()))); } @@ -354,7 +351,7 @@ private: BlockMarkupIterator savedDictionary(stream, Array{ "|TERM|", "|DEFINITION|" }); while (auto read = savedDictionary.Next()) { - const auto& [terms, definition] = *read; + const auto& [terms, definition] = read.value(); auto storedDefinition = StoreCopy(definition); std::string_view termsView = terms; size_t start = 0, end = termsView.find("|TERM|"); @@ -366,7 +363,20 @@ private: } dictionary.push_back(DictionaryEntry{ StoreCopy(termsView.substr(start)), storedDefinition }); } - std::sort(dictionary.begin(), dictionary.end()); + std::stable_sort(dictionary.begin(), dictionary.end()); + + inflections.clear(); + stream.seekg(0); + BlockMarkupIterator savedInflections(stream, Array{ "|ROOT|", "|INFLECTS TO|", "|NAME|" }); + while (auto read = savedInflections.Next()) + { + const auto& [root, inflectsTo, name] = read.value(); + if (!inflections.emplace_back(Inflection{ + S(root), + QRegularExpression(QRegularExpression::anchoredPattern(S(inflectsTo)), QRegularExpression::UseUnicodePropertiesOption), + S(name) + }).inflectsTo.isValid()) TEXTRACTOR_MESSAGE(L"Invalid regex: %s", StringToWideString(inflectsTo)); + } } void setTerm(QString term) @@ -375,11 +385,17 @@ private: UpdateDictionary(); definitions.clear(); definitionIndex = 0; - std::unordered_set definitionSet; - for (QByteArray utf8term = term.left(500).toUtf8(); !utf8term.isEmpty(); utf8term.chop(1)) - for (auto [it, end] = std::equal_range(dictionary.begin(), dictionary.end(), DictionaryEntry{ utf8term }); it != end; ++it) - if (definitionSet.emplace(it->definition).second) - definitions.push_back(QStringLiteral("

%1 (%3/%4)

%2").arg(utf8term, it->definition)); + std::unordered_set foundDefinitions; + for (term = term.left(500); !term.isEmpty(); term.chop(1)) + for (const auto& [rootTerm, definition, inflections] : LookupDefinitions(term, foundDefinitions)) + definitions.push_back( + QStringLiteral("

%1 (%5/%6)

%2 %3

%4

").arg( + term, + rootTerm.split("<<")[0], + inflections.join(""), + definition + ) + ); for (int i = 0; i < definitions.size(); ++i) definitions[i] = definitions[i].arg(i + 1).arg(definitions.size()); ShowDefinition(); } @@ -403,6 +419,29 @@ private: QString term; private: + struct LookupResult + { + QString term; + QString definition; + QStringList inflectionsUsed; + }; + std::vector LookupDefinitions(QString term, std::unordered_set& foundDefinitions, QStringList inflectionsUsed = {}) + { + std::vector results; + for (auto [it, end] = std::equal_range(dictionary.begin(), dictionary.end(), DictionaryEntry{ term.toUtf8() }); it != end; ++it) + if (foundDefinitions.emplace(it->definition).second) + results.push_back({ term, it->definition, inflectionsUsed }); + for (const auto& inflection : inflections) if (auto match = inflection.inflectsTo.match(term); match.hasMatch()) + { + QStringList currentInflectionsUsed = inflectionsUsed; + currentInflectionsUsed.push_front(inflection.name); + QString root = inflection.root; + for (int i = 0; i < root.size(); ++i) if (root[i].isDigit()) root.replace(i, 1, match.captured(root[i].unicode() - '0')); + for (const auto& definition : LookupDefinitions(root, foundDefinitions, currentInflectionsUsed)) results.push_back(definition); + } + return results; + } + void wheelEvent(QWheelEvent* event) override { int scroll = event->angleDelta().y(); @@ -411,6 +450,14 @@ private: ShowDefinition(); } + struct Inflection + { + QString root; + QRegularExpression inflectsTo; + QString name; + }; + std::vector inflections; + std::filesystem::file_time_type dictionaryFileLastWrite; std::vector charStorage; std::vector definitions; diff --git a/extensions/replacer.cpp b/extensions/replacer.cpp index a67aaec..ededfe9 100644 --- a/extensions/replacer.cpp +++ b/extensions/replacer.cpp @@ -17,10 +17,10 @@ class Trie public: Trie(const std::istream& replacementScript) { - BlockMarkupIterator replacementScriptParser(replacementScript.rdbuf(), Array{ L"|ORIG|", L"|BECOMES|" }); + BlockMarkupIterator replacementScriptParser(replacementScript, Array{ L"|ORIG|", L"|BECOMES|" }); while (auto read = replacementScriptParser.Next()) { - const auto& [original, replacement] = *read; + const auto& [original, replacement] = read.value(); Node* current = &root; for (auto ch : original) if (!Ignore(ch)) current = Next(current, ch); if (current != &root) @@ -103,7 +103,7 @@ BOOL WINAPI DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved if (trie.Empty()) { auto file = std::ofstream(REPLACE_SAVE_FILE, std::ios::binary) << "\xff\xfe"; - for (auto ch : std::wstring_view(REPLACER_INSTRUCTIONS)) file << (ch == L'\n' ? std::string_view("\r\0\n", 4) : std::string_view((char*)&ch, 2)); + for (auto ch : std::wstring_view(REPLACER_INSTRUCTIONS)) file << (ch == L'\n' ? std::string_view("\r\0\n", 4) : std::string_view((char*)&ch, 2)); _spawnlp(_P_DETACH, "notepad", "notepad", REPLACE_SAVE_FILE, NULL); // show file to user } } diff --git a/text.cpp b/text.cpp index 3c16331..c6af136 100644 --- a/text.cpp +++ b/text.cpp @@ -128,10 +128,16 @@ const char* DICTIONARY_INSTRUCTIONS = u8R"(This file is used only for the "Dicti It uses a custom format specific to Textractor and is not meant to be written manually. You should look for a dictionary in this format online (https://github.com/Artikash/Textractor-Dictionaries/releases is a good place to start). Alternatively, if you're a programmer, you can write a script to convert a dictionary from another format with the info below. -Once you have a dictionary, to look up some text in Extra Window, hover over it. All matching definitions will be shown. Scroll to change definitions. -Definitions are formatted like this:|TERM|Hola|TERM|hola|TERM|Bonjour|TERM|bonjour|DEFINITION|hello|END| -The definition can include rich text (https://doc.qt.io/qt-5/richtext-html-subset.html) which will be formatted properly. -All text in this file outside of a definition is ignored. +Once you have a dictionary, to look up some text in Extra Window, hover over it. You can scroll through all the matching definitions. +Definitions are formatted like this:|TERM|Hola<