add check for outdated doc and refactor input text modifications

Added the flag for checking if webdocument structure is outdated
Refactor text modifications before processing to the translator
This commit is contained in:
zeheyler 2020-10-20 02:08:59 +03:00
parent 2ab780a491
commit 3af4217075
3 changed files with 147 additions and 110 deletions

View File

@ -71,7 +71,7 @@ bool DevTools::startChrome(QString path, bool headless, int port)
if (!std::filesystem::exists(path.toStdWString())) if (!std::filesystem::exists(path.toStdWString()))
return false; return false;
DWORD exitCode = 0; DWORD exitCode = 0;
if ((GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE) && (exitCode == STILL_ACTIVE)) if (GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE && exitCode == STILL_ACTIVE)
return false; return false;
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir=" QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
+ QString::fromStdWString(std::filesystem::current_path()) + QString::fromStdWString(std::filesystem::current_path())
@ -237,8 +237,8 @@ void DevTools::onTextMessageReceived(QString message)
{ {
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();) for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
{ {
if ((iter->second.value("method") == root.value("method")) if (iter->second.value("method") == root.value("method")
&& (compareJson(iter->second.value("params"), root.value("params")))) && compareJson(iter->second.value("params"), root.value("params")))
{ {
mutex.lock(); mutex.lock();
mapmethod.erase(iter++); mapmethod.erase(iter++);

View File

@ -33,28 +33,48 @@ QStringList languages
}; };
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1; int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
long update = -1;
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools) std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
{ {
QString qtext = S(text); QString qtext = S(text);
qtext.remove(QString(12288)); // japanese space (no need for translator)
// Check text for repeated symbols (e.g. only ellipsis) // Check quotes
if (qtext.length() > 2) bool checkquote = false;
for (int i = 1; i < (qtext.length() - 1); i++) if ((qtext.front() == QString(12300) && qtext.back() == QString(12301)) // japanese quotation marks
{ || (qtext.front() == "\"" && qtext.back() == "\""))
if (qtext[i] != qtext[1]) {
break; checkquote = true;
if ((i + 2) == qtext.length() && (qtext.front() == qtext.back())) qtext.remove(0, 1);
{ qtext.chop(1);
return { true, text }; }
}
}
if (qtext == QString(12387)) // if text consists of only one sokuon, add exclamation mark for correct translation
{
qtext += "!";
}
// Check ellipsis
int count = qtext.count(QString(8230)); // ellipsis
if (count == qtext.length()
|| (count == (qtext.length() - 1) && qtext.back() == QString(12290))) // japanese end of a sentence
{
return { true, text };
}
// Put quotes back
if (checkquote)
{
qtext = "\"" + qtext + "\"";
}
// Check status
if (devtools->getStatus() == "Stopped") if (devtools->getStatus() == "Stopped")
{ {
return { false, FormatString(L"%s", ERROR_CHROME) }; return { false, FormatString(L"%s", ERROR_CHROME) };
} }
if ((devtools->getStatus().startsWith("Fail")) || (devtools->getStatus().startsWith("Unconnected"))) if (devtools->getStatus().startsWith("Fail") || devtools->getStatus().startsWith("Unconnected"))
{ {
return { false, FormatString(L"%s", ERROR_START_CHROME) }; return { false, FormatString(L"%s", ERROR_START_CHROME) };
} }
@ -65,22 +85,15 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
targetNodeId = -1; targetNodeId = -1;
pageenabled = -1; pageenabled = -1;
useragentflag = -1; useragentflag = -1;
update = -1;
} }
// Add spaces near ellipsis for better translation and check for quotes // Erase tags and reduce the number of ellipsis for better translation
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230)); qtext.remove(QRegExp("<[^>]*>"));
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230)); qtext.replace(QRegExp("(" + QString(8230) + ")+"), " " + QString(8230));
qtext.replace(QRegularExpression("[" + QString(8230) + "]"), " " + QString(8230) + " ");
bool checkquote = false;
if ((qtext.front() == QString(12300)) && (qtext.back() == QString(12301)))
{
checkquote = true;
qtext.remove(0, 1);
qtext.chop(1);
}
QJsonObject root;
// Enable page feedback // Enable page feedback
QJsonObject root;
if (pageenabled == -1) if (pageenabled == -1)
{ {
if (!devtools->SendRequest("Page.enable", {}, root)) if (!devtools->SendRequest("Page.enable", {}, root))
@ -94,9 +107,9 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
if (useragentflag == -1) if (useragentflag == -1)
{ {
QString useragent = devtools->getUserAgent(); QString useragent = devtools->getUserAgent();
useragent.replace(QRegularExpression("HeadlessChrome"), "Chrome");
if (!useragent.isEmpty()) if (!useragent.isEmpty())
{ {
useragent.replace("HeadlessChrome", "Chrome");
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root)) if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
{ {
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) }; return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
@ -107,108 +120,117 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument"); long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } }); long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
if (update == -1)
{
update = devtools->methodToReceive("DOM.documentUpdated");
}
// Navigate to site // Navigate to site
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext; QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
if (devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root)) if (!devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
{ {
// Wait until page is loaded return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
float timer = 0; }
int timer_stop = 10;
while (!devtools->checkMethod(navigate) && timer < timer_stop) // Wait until page is loaded
float timer = 0;
int timer_stop = 10;
while (!devtools->checkMethod(navigate) && timer < timer_stop)
{
std::this_thread::sleep_for(std::chrono::milliseconds(100));
timer += 0.1;
}
if (timer >= timer_stop)
{
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
}
// Check if document is outdated
if (devtools->checkMethod(update))
{
docfound = -1;
targetNodeId = -1;
update = -1;
}
// Get document
if (docfound == -1)
{
if (!devtools->SendRequest("DOM.getDocument", {}, root))
{ {
std::this_thread::sleep_for(std::chrono::milliseconds(100)); return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
timer += 0.1;
} }
if (timer >= timer_stop) docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
}
// Get target selector
if (targetNodeId == -1)
{
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root)
|| root.value("result").toObject().value("nodeId").toInt() == 0)
{
docfound = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
targetNodeId = root.value("result").toObject().value("nodeId").toInt();
}
// Wait for translation to appear on the web page
timer = 0;
while (!devtools->checkMethod(target) && timer < timer_stop)
{
std::this_thread::sleep_for(std::chrono::milliseconds(100));
timer += 0.1;
}
// Catch the translation
if (!devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root))
{
docfound = -1;
targetNodeId = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
if (OuterHTML == "<div></div>")
{
// Try to catch the notification
int noteNodeId = -1;
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root)
|| root.value("result").toObject().value("nodeId").toInt() == 0)
{ {
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) }; return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
} }
noteNodeId = root.value("result").toObject().value("nodeId").toInt();
// Get document if (devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", noteNodeId} }, root))
if (docfound == -1)
{ {
if (!devtools->SendRequest("DOM.getDocument", {}, root)) OuterHTML = root.value("result").toObject().value("outerHTML").toString();
{
docfound = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
}
//Get target selector
if (targetNodeId == -1)
{
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root))
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
{
docfound = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
targetNodeId = root.value("result").toObject().value("nodeId").toInt();
}
// Wait for translation to appear on the web page
timer = 0;
while (!devtools->checkMethod(target) && timer < timer_stop)
{
std::this_thread::sleep_for(std::chrono::milliseconds(100));
timer += 0.1;
}
// Catch the translation
devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root);
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
if (OuterHTML == "<div></div>")
{
// Try to catch the notification
int noteNodeId = -1;
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root))
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
{
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
}
noteNodeId = root.value("result").toObject().value("nodeId").toInt();
if (devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", noteNodeId} }, root))
{
OuterHTML = root.value("result").toObject().value("outerHTML").toString();
}
OuterHTML.remove(QRegExp("<[^>]*>"));
OuterHTML = OuterHTML.trimmed();
return { false, FormatString(L"%s: %s", ERROR_NOTE, S(OuterHTML)) };
} }
OuterHTML.remove(QRegExp("<[^>]*>")); OuterHTML.remove(QRegExp("<[^>]*>"));
OuterHTML = OuterHTML.trimmed(); OuterHTML = OuterHTML.trimmed();
// Check if the translator output language does not match the selected language return { false, FormatString(L"%s: %s", ERROR_NOTE, S(OuterHTML)) };
if (devtools->SendRequest("DOM.getAttributes", { {"nodeId", targetNodeId} }, root)) }
OuterHTML.remove(QRegExp("<[^>]*>"));
OuterHTML = OuterHTML.trimmed();
// Check if the translator output language does not match the selected language
if (devtools->SendRequest("DOM.getAttributes", { {"nodeId", targetNodeId} }, root))
{
QJsonObject result = root.value("result").toObject();
QJsonArray attributes = result.value("attributes").toArray();
for (size_t i = 0; i < attributes.size(); i++)
{ {
QJsonObject result = root.value("result").toObject(); if (attributes[i].toString() == "lang")
QJsonArray attributes = result.value("attributes").toArray();
for (size_t i = 0; i < attributes.size(); i++)
{ {
if (attributes[i].toString() == "lang") QString targetlang = attributes[i + 1].toString().mid(0, 2);
if (targetlang != S(translateTo.Copy()))
{ {
QString targetlang = attributes[i + 1].toString().mid(0, 2); return { false, FormatString(L"%s (%s): %s", ERROR_LANGUAGE, S(targetlang), S(OuterHTML)) };
if (targetlang != S(translateTo.Copy()))
{
return { false, FormatString(L"%s (%s): %s", ERROR_LANGUAGE, S(targetlang), S(OuterHTML)) };
}
} }
} }
} }
}
// Get quotes back return { true, S(OuterHTML) };
if (checkquote)
{
OuterHTML = "\"" + OuterHTML + "\"";
}
return { true, S(OuterHTML) };
}
else
{
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
} }

View File

@ -50,6 +50,17 @@ void SaveCache()
savedSize = translationCache->size(); savedSize = translationCache->size();
} }
void EraseControlCharacters(std::wstring& text)
{
for (auto it = text.begin(); it!= text.end(); ++it)
{
if ((*it == '\n') || (*it == '\r') || (*it == '\t') || (int(*it) == 4) || (int(*it) == 5))
{
text.erase(it--);
}
}
}
class Window : public QDialog class Window : public QDialog
{ {
public: public:
@ -184,7 +195,11 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b"; if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
} }
if (translation.empty() && (sentenceInfo["current select"])) if (translation.empty() && (sentenceInfo["current select"]))
{
EraseControlCharacters(sentence);
std::tie(cache, translation) = Translate(sentence, devtools); std::tie(cache, translation) = Translate(sentence, devtools);
}
if (cache) translationCache->try_emplace(sentence, translation); if (cache) translationCache->try_emplace(sentence, translation);
if (cache && translationCache->size() > savedSize + 50) SaveCache(); if (cache && translationCache->size() > savedSize + 50) SaveCache();