add check for outdated doc and refactor input text modifications

Added the flag for checking if webdocument structure is outdated
Refactor text modifications before processing to the translator
This commit is contained in:
zeheyler 2020-10-20 02:08:59 +03:00
parent 2ab780a491
commit 3af4217075
3 changed files with 147 additions and 110 deletions

View File

@ -71,7 +71,7 @@ bool DevTools::startChrome(QString path, bool headless, int port)
if (!std::filesystem::exists(path.toStdWString())) if (!std::filesystem::exists(path.toStdWString()))
return false; return false;
DWORD exitCode = 0; DWORD exitCode = 0;
if ((GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE) && (exitCode == STILL_ACTIVE)) if (GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE && exitCode == STILL_ACTIVE)
return false; return false;
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir=" QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
+ QString::fromStdWString(std::filesystem::current_path()) + QString::fromStdWString(std::filesystem::current_path())
@ -237,8 +237,8 @@ void DevTools::onTextMessageReceived(QString message)
{ {
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();) for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
{ {
if ((iter->second.value("method") == root.value("method")) if (iter->second.value("method") == root.value("method")
&& (compareJson(iter->second.value("params"), root.value("params")))) && compareJson(iter->second.value("params"), root.value("params")))
{ {
mutex.lock(); mutex.lock();
mapmethod.erase(iter++); mapmethod.erase(iter++);

View File

@ -33,28 +33,48 @@ QStringList languages
}; };
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1; int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
long update = -1;
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools) std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
{ {
QString qtext = S(text); QString qtext = S(text);
qtext.remove(QString(12288)); // japanese space (no need for translator)
// Check text for repeated symbols (e.g. only ellipsis) // Check quotes
if (qtext.length() > 2) bool checkquote = false;
for (int i = 1; i < (qtext.length() - 1); i++) if ((qtext.front() == QString(12300) && qtext.back() == QString(12301)) // japanese quotation marks
|| (qtext.front() == "\"" && qtext.back() == "\""))
{ {
if (qtext[i] != qtext[1]) checkquote = true;
break; qtext.remove(0, 1);
if ((i + 2) == qtext.length() && (qtext.front() == qtext.back())) qtext.chop(1);
}
if (qtext == QString(12387)) // if text consists of only one sokuon, add exclamation mark for correct translation
{
qtext += "!";
}
// Check ellipsis
int count = qtext.count(QString(8230)); // ellipsis
if (count == qtext.length()
|| (count == (qtext.length() - 1) && qtext.back() == QString(12290))) // japanese end of a sentence
{ {
return { true, text }; return { true, text };
} }
// Put quotes back
if (checkquote)
{
qtext = "\"" + qtext + "\"";
} }
// Check status
if (devtools->getStatus() == "Stopped") if (devtools->getStatus() == "Stopped")
{ {
return { false, FormatString(L"%s", ERROR_CHROME) }; return { false, FormatString(L"%s", ERROR_CHROME) };
} }
if ((devtools->getStatus().startsWith("Fail")) || (devtools->getStatus().startsWith("Unconnected"))) if (devtools->getStatus().startsWith("Fail") || devtools->getStatus().startsWith("Unconnected"))
{ {
return { false, FormatString(L"%s", ERROR_START_CHROME) }; return { false, FormatString(L"%s", ERROR_START_CHROME) };
} }
@ -65,22 +85,15 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
targetNodeId = -1; targetNodeId = -1;
pageenabled = -1; pageenabled = -1;
useragentflag = -1; useragentflag = -1;
update = -1;
} }
// Add spaces near ellipsis for better translation and check for quotes // Erase tags and reduce the number of ellipsis for better translation
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230)); qtext.remove(QRegExp("<[^>]*>"));
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230)); qtext.replace(QRegExp("(" + QString(8230) + ")+"), " " + QString(8230));
qtext.replace(QRegularExpression("[" + QString(8230) + "]"), " " + QString(8230) + " ");
bool checkquote = false;
if ((qtext.front() == QString(12300)) && (qtext.back() == QString(12301)))
{
checkquote = true;
qtext.remove(0, 1);
qtext.chop(1);
}
QJsonObject root;
// Enable page feedback // Enable page feedback
QJsonObject root;
if (pageenabled == -1) if (pageenabled == -1)
{ {
if (!devtools->SendRequest("Page.enable", {}, root)) if (!devtools->SendRequest("Page.enable", {}, root))
@ -94,9 +107,9 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
if (useragentflag == -1) if (useragentflag == -1)
{ {
QString useragent = devtools->getUserAgent(); QString useragent = devtools->getUserAgent();
useragent.replace(QRegularExpression("HeadlessChrome"), "Chrome");
if (!useragent.isEmpty()) if (!useragent.isEmpty())
{ {
useragent.replace("HeadlessChrome", "Chrome");
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root)) if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
{ {
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) }; return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
@ -107,11 +120,18 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument"); long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } }); long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
if (update == -1)
{
update = devtools->methodToReceive("DOM.documentUpdated");
}
// Navigate to site // Navigate to site
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext; QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
if (devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root)) if (!devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
{ {
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
// Wait until page is loaded // Wait until page is loaded
float timer = 0; float timer = 0;
int timer_stop = 10; int timer_stop = 10;
@ -125,22 +145,29 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) }; return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
} }
// Check if document is outdated
if (devtools->checkMethod(update))
{
docfound = -1;
targetNodeId = -1;
update = -1;
}
// Get document // Get document
if (docfound == -1) if (docfound == -1)
{ {
if (!devtools->SendRequest("DOM.getDocument", {}, root)) if (!devtools->SendRequest("DOM.getDocument", {}, root))
{ {
docfound = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) }; return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
} }
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt(); docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
} }
//Get target selector // Get target selector
if (targetNodeId == -1) if (targetNodeId == -1)
{ {
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root)) if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root)
|| (root.value("result").toObject().value("nodeId").toInt() == 0)) || root.value("result").toObject().value("nodeId").toInt() == 0)
{ {
docfound = -1; docfound = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) }; return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
@ -157,14 +184,19 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
} }
// Catch the translation // Catch the translation
devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root); if (!devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root))
{
docfound = -1;
targetNodeId = -1;
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString(); QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
if (OuterHTML == "<div></div>") if (OuterHTML == "<div></div>")
{ {
// Try to catch the notification // Try to catch the notification
int noteNodeId = -1; int noteNodeId = -1;
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root)) if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root)
|| (root.value("result").toObject().value("nodeId").toInt() == 0)) || root.value("result").toObject().value("nodeId").toInt() == 0)
{ {
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) }; return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
} }
@ -200,15 +232,5 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
} }
} }
// Get quotes back
if (checkquote)
{
OuterHTML = "\"" + OuterHTML + "\"";
}
return { true, S(OuterHTML) }; return { true, S(OuterHTML) };
}
else
{
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
}
} }

View File

@ -50,6 +50,17 @@ void SaveCache()
savedSize = translationCache->size(); savedSize = translationCache->size();
} }
void EraseControlCharacters(std::wstring& text)
{
for (auto it = text.begin(); it!= text.end(); ++it)
{
if ((*it == '\n') || (*it == '\r') || (*it == '\t') || (int(*it) == 4) || (int(*it) == 5))
{
text.erase(it--);
}
}
}
class Window : public QDialog class Window : public QDialog
{ {
public: public:
@ -184,7 +195,11 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b"; if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
} }
if (translation.empty() && (sentenceInfo["current select"])) if (translation.empty() && (sentenceInfo["current select"]))
{
EraseControlCharacters(sentence);
std::tie(cache, translation) = Translate(sentence, devtools); std::tie(cache, translation) = Translate(sentence, devtools);
}
if (cache) translationCache->try_emplace(sentence, translation); if (cache) translationCache->try_emplace(sentence, translation);
if (cache && translationCache->size() > savedSize + 50) SaveCache(); if (cache && translationCache->size() > savedSize + 50) SaveCache();