add check for outdated doc and refactor input text modifications
Added the flag for checking if webdocument structure is outdated Refactor text modifications before processing to the translator
This commit is contained in:
parent
2ab780a491
commit
3af4217075
@ -71,7 +71,7 @@ bool DevTools::startChrome(QString path, bool headless, int port)
|
|||||||
if (!std::filesystem::exists(path.toStdWString()))
|
if (!std::filesystem::exists(path.toStdWString()))
|
||||||
return false;
|
return false;
|
||||||
DWORD exitCode = 0;
|
DWORD exitCode = 0;
|
||||||
if ((GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE) && (exitCode == STILL_ACTIVE))
|
if (GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE && exitCode == STILL_ACTIVE)
|
||||||
return false;
|
return false;
|
||||||
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
|
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
|
||||||
+ QString::fromStdWString(std::filesystem::current_path())
|
+ QString::fromStdWString(std::filesystem::current_path())
|
||||||
@ -237,8 +237,8 @@ void DevTools::onTextMessageReceived(QString message)
|
|||||||
{
|
{
|
||||||
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
|
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
|
||||||
{
|
{
|
||||||
if ((iter->second.value("method") == root.value("method"))
|
if (iter->second.value("method") == root.value("method")
|
||||||
&& (compareJson(iter->second.value("params"), root.value("params"))))
|
&& compareJson(iter->second.value("params"), root.value("params")))
|
||||||
{
|
{
|
||||||
mutex.lock();
|
mutex.lock();
|
||||||
mapmethod.erase(iter++);
|
mapmethod.erase(iter++);
|
||||||
|
@ -33,28 +33,48 @@ QStringList languages
|
|||||||
};
|
};
|
||||||
|
|
||||||
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
|
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
|
||||||
|
long update = -1;
|
||||||
|
|
||||||
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
|
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
|
||||||
{
|
{
|
||||||
QString qtext = S(text);
|
QString qtext = S(text);
|
||||||
|
qtext.remove(QString(12288)); // japanese space (no need for translator)
|
||||||
|
|
||||||
// Check text for repeated symbols (e.g. only ellipsis)
|
// Check quotes
|
||||||
if (qtext.length() > 2)
|
bool checkquote = false;
|
||||||
for (int i = 1; i < (qtext.length() - 1); i++)
|
if ((qtext.front() == QString(12300) && qtext.back() == QString(12301)) // japanese quotation marks
|
||||||
{
|
|| (qtext.front() == "\"" && qtext.back() == "\""))
|
||||||
if (qtext[i] != qtext[1])
|
{
|
||||||
break;
|
checkquote = true;
|
||||||
if ((i + 2) == qtext.length() && (qtext.front() == qtext.back()))
|
qtext.remove(0, 1);
|
||||||
{
|
qtext.chop(1);
|
||||||
return { true, text };
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (qtext == QString(12387)) // if text consists of only one sokuon, add exclamation mark for correct translation
|
||||||
|
{
|
||||||
|
qtext += "!";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check ellipsis
|
||||||
|
int count = qtext.count(QString(8230)); // ellipsis
|
||||||
|
if (count == qtext.length()
|
||||||
|
|| (count == (qtext.length() - 1) && qtext.back() == QString(12290))) // japanese end of a sentence
|
||||||
|
{
|
||||||
|
return { true, text };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put quotes back
|
||||||
|
if (checkquote)
|
||||||
|
{
|
||||||
|
qtext = "\"" + qtext + "\"";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check status
|
||||||
if (devtools->getStatus() == "Stopped")
|
if (devtools->getStatus() == "Stopped")
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_CHROME) };
|
return { false, FormatString(L"%s", ERROR_CHROME) };
|
||||||
}
|
}
|
||||||
if ((devtools->getStatus().startsWith("Fail")) || (devtools->getStatus().startsWith("Unconnected")))
|
if (devtools->getStatus().startsWith("Fail") || devtools->getStatus().startsWith("Unconnected"))
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_START_CHROME) };
|
return { false, FormatString(L"%s", ERROR_START_CHROME) };
|
||||||
}
|
}
|
||||||
@ -65,22 +85,15 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
targetNodeId = -1;
|
targetNodeId = -1;
|
||||||
pageenabled = -1;
|
pageenabled = -1;
|
||||||
useragentflag = -1;
|
useragentflag = -1;
|
||||||
|
update = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add spaces near ellipsis for better translation and check for quotes
|
// Erase tags and reduce the number of ellipsis for better translation
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230));
|
qtext.remove(QRegExp("<[^>]*>"));
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230));
|
qtext.replace(QRegExp("(" + QString(8230) + ")+"), " " + QString(8230));
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]"), " " + QString(8230) + " ");
|
|
||||||
bool checkquote = false;
|
|
||||||
if ((qtext.front() == QString(12300)) && (qtext.back() == QString(12301)))
|
|
||||||
{
|
|
||||||
checkquote = true;
|
|
||||||
qtext.remove(0, 1);
|
|
||||||
qtext.chop(1);
|
|
||||||
}
|
|
||||||
QJsonObject root;
|
|
||||||
|
|
||||||
// Enable page feedback
|
// Enable page feedback
|
||||||
|
QJsonObject root;
|
||||||
if (pageenabled == -1)
|
if (pageenabled == -1)
|
||||||
{
|
{
|
||||||
if (!devtools->SendRequest("Page.enable", {}, root))
|
if (!devtools->SendRequest("Page.enable", {}, root))
|
||||||
@ -94,9 +107,9 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
if (useragentflag == -1)
|
if (useragentflag == -1)
|
||||||
{
|
{
|
||||||
QString useragent = devtools->getUserAgent();
|
QString useragent = devtools->getUserAgent();
|
||||||
useragent.replace(QRegularExpression("HeadlessChrome"), "Chrome");
|
|
||||||
if (!useragent.isEmpty())
|
if (!useragent.isEmpty())
|
||||||
{
|
{
|
||||||
|
useragent.replace("HeadlessChrome", "Chrome");
|
||||||
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
|
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
@ -107,108 +120,117 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
|
|
||||||
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
|
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
|
||||||
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
|
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
|
||||||
|
if (update == -1)
|
||||||
|
{
|
||||||
|
update = devtools->methodToReceive("DOM.documentUpdated");
|
||||||
|
}
|
||||||
|
|
||||||
// Navigate to site
|
// Navigate to site
|
||||||
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
|
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
|
||||||
if (devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
|
if (!devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
|
||||||
{
|
{
|
||||||
// Wait until page is loaded
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
float timer = 0;
|
}
|
||||||
int timer_stop = 10;
|
|
||||||
while (!devtools->checkMethod(navigate) && timer < timer_stop)
|
// Wait until page is loaded
|
||||||
|
float timer = 0;
|
||||||
|
int timer_stop = 10;
|
||||||
|
while (!devtools->checkMethod(navigate) && timer < timer_stop)
|
||||||
|
{
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
timer += 0.1;
|
||||||
|
}
|
||||||
|
if (timer >= timer_stop)
|
||||||
|
{
|
||||||
|
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if document is outdated
|
||||||
|
if (devtools->checkMethod(update))
|
||||||
|
{
|
||||||
|
docfound = -1;
|
||||||
|
targetNodeId = -1;
|
||||||
|
update = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get document
|
||||||
|
if (docfound == -1)
|
||||||
|
{
|
||||||
|
if (!devtools->SendRequest("DOM.getDocument", {}, root))
|
||||||
{
|
{
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
timer += 0.1;
|
|
||||||
}
|
}
|
||||||
if (timer >= timer_stop)
|
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get target selector
|
||||||
|
if (targetNodeId == -1)
|
||||||
|
{
|
||||||
|
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root)
|
||||||
|
|| root.value("result").toObject().value("nodeId").toInt() == 0)
|
||||||
|
{
|
||||||
|
docfound = -1;
|
||||||
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
|
}
|
||||||
|
targetNodeId = root.value("result").toObject().value("nodeId").toInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for translation to appear on the web page
|
||||||
|
timer = 0;
|
||||||
|
while (!devtools->checkMethod(target) && timer < timer_stop)
|
||||||
|
{
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||||
|
timer += 0.1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Catch the translation
|
||||||
|
if (!devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root))
|
||||||
|
{
|
||||||
|
docfound = -1;
|
||||||
|
targetNodeId = -1;
|
||||||
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
|
}
|
||||||
|
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
||||||
|
if (OuterHTML == "<div></div>")
|
||||||
|
{
|
||||||
|
// Try to catch the notification
|
||||||
|
int noteNodeId = -1;
|
||||||
|
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root)
|
||||||
|
|| root.value("result").toObject().value("nodeId").toInt() == 0)
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
||||||
}
|
}
|
||||||
|
noteNodeId = root.value("result").toObject().value("nodeId").toInt();
|
||||||
|
|
||||||
// Get document
|
if (devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", noteNodeId} }, root))
|
||||||
if (docfound == -1)
|
|
||||||
{
|
{
|
||||||
if (!devtools->SendRequest("DOM.getDocument", {}, root))
|
OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
||||||
{
|
|
||||||
docfound = -1;
|
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
|
||||||
}
|
|
||||||
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
//Get target selector
|
|
||||||
if (targetNodeId == -1)
|
|
||||||
{
|
|
||||||
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root))
|
|
||||||
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
|
|
||||||
{
|
|
||||||
docfound = -1;
|
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
|
||||||
}
|
|
||||||
targetNodeId = root.value("result").toObject().value("nodeId").toInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for translation to appear on the web page
|
|
||||||
timer = 0;
|
|
||||||
while (!devtools->checkMethod(target) && timer < timer_stop)
|
|
||||||
{
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
|
||||||
timer += 0.1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Catch the translation
|
|
||||||
devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root);
|
|
||||||
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
|
||||||
if (OuterHTML == "<div></div>")
|
|
||||||
{
|
|
||||||
// Try to catch the notification
|
|
||||||
int noteNodeId = -1;
|
|
||||||
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root))
|
|
||||||
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
|
|
||||||
{
|
|
||||||
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
|
||||||
}
|
|
||||||
noteNodeId = root.value("result").toObject().value("nodeId").toInt();
|
|
||||||
|
|
||||||
if (devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", noteNodeId} }, root))
|
|
||||||
{
|
|
||||||
OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
|
||||||
}
|
|
||||||
OuterHTML.remove(QRegExp("<[^>]*>"));
|
|
||||||
OuterHTML = OuterHTML.trimmed();
|
|
||||||
|
|
||||||
return { false, FormatString(L"%s: %s", ERROR_NOTE, S(OuterHTML)) };
|
|
||||||
}
|
}
|
||||||
OuterHTML.remove(QRegExp("<[^>]*>"));
|
OuterHTML.remove(QRegExp("<[^>]*>"));
|
||||||
OuterHTML = OuterHTML.trimmed();
|
OuterHTML = OuterHTML.trimmed();
|
||||||
|
|
||||||
// Check if the translator output language does not match the selected language
|
return { false, FormatString(L"%s: %s", ERROR_NOTE, S(OuterHTML)) };
|
||||||
if (devtools->SendRequest("DOM.getAttributes", { {"nodeId", targetNodeId} }, root))
|
}
|
||||||
|
OuterHTML.remove(QRegExp("<[^>]*>"));
|
||||||
|
OuterHTML = OuterHTML.trimmed();
|
||||||
|
|
||||||
|
// Check if the translator output language does not match the selected language
|
||||||
|
if (devtools->SendRequest("DOM.getAttributes", { {"nodeId", targetNodeId} }, root))
|
||||||
|
{
|
||||||
|
QJsonObject result = root.value("result").toObject();
|
||||||
|
QJsonArray attributes = result.value("attributes").toArray();
|
||||||
|
for (size_t i = 0; i < attributes.size(); i++)
|
||||||
{
|
{
|
||||||
QJsonObject result = root.value("result").toObject();
|
if (attributes[i].toString() == "lang")
|
||||||
QJsonArray attributes = result.value("attributes").toArray();
|
|
||||||
for (size_t i = 0; i < attributes.size(); i++)
|
|
||||||
{
|
{
|
||||||
if (attributes[i].toString() == "lang")
|
QString targetlang = attributes[i + 1].toString().mid(0, 2);
|
||||||
|
if (targetlang != S(translateTo.Copy()))
|
||||||
{
|
{
|
||||||
QString targetlang = attributes[i + 1].toString().mid(0, 2);
|
return { false, FormatString(L"%s (%s): %s", ERROR_LANGUAGE, S(targetlang), S(OuterHTML)) };
|
||||||
if (targetlang != S(translateTo.Copy()))
|
|
||||||
{
|
|
||||||
return { false, FormatString(L"%s (%s): %s", ERROR_LANGUAGE, S(targetlang), S(OuterHTML)) };
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get quotes back
|
return { true, S(OuterHTML) };
|
||||||
if (checkquote)
|
|
||||||
{
|
|
||||||
OuterHTML = "\"" + OuterHTML + "\"";
|
|
||||||
}
|
|
||||||
return { true, S(OuterHTML) };
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -50,6 +50,17 @@ void SaveCache()
|
|||||||
savedSize = translationCache->size();
|
savedSize = translationCache->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EraseControlCharacters(std::wstring& text)
|
||||||
|
{
|
||||||
|
for (auto it = text.begin(); it!= text.end(); ++it)
|
||||||
|
{
|
||||||
|
if ((*it == '\n') || (*it == '\r') || (*it == '\t') || (int(*it) == 4) || (int(*it) == 5))
|
||||||
|
{
|
||||||
|
text.erase(it--);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Window : public QDialog
|
class Window : public QDialog
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -184,7 +195,11 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
|||||||
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
|
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
|
||||||
}
|
}
|
||||||
if (translation.empty() && (sentenceInfo["current select"]))
|
if (translation.empty() && (sentenceInfo["current select"]))
|
||||||
|
{
|
||||||
|
EraseControlCharacters(sentence);
|
||||||
std::tie(cache, translation) = Translate(sentence, devtools);
|
std::tie(cache, translation) = Translate(sentence, devtools);
|
||||||
|
}
|
||||||
|
|
||||||
if (cache) translationCache->try_emplace(sentence, translation);
|
if (cache) translationCache->try_emplace(sentence, translation);
|
||||||
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user