mirror of
https://github.com/Artikash/Textractor.git
synced 2024-12-23 17:04:12 +08:00
add check for outdated doc and refactor input text modifications
Added the flag for checking if webdocument structure is outdated Refactor text modifications before processing to the translator
This commit is contained in:
parent
2ab780a491
commit
3af4217075
@ -71,7 +71,7 @@ bool DevTools::startChrome(QString path, bool headless, int port)
|
|||||||
if (!std::filesystem::exists(path.toStdWString()))
|
if (!std::filesystem::exists(path.toStdWString()))
|
||||||
return false;
|
return false;
|
||||||
DWORD exitCode = 0;
|
DWORD exitCode = 0;
|
||||||
if ((GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE) && (exitCode == STILL_ACTIVE))
|
if (GetExitCodeProcess(processInfo.hProcess, &exitCode) != FALSE && exitCode == STILL_ACTIVE)
|
||||||
return false;
|
return false;
|
||||||
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
|
QString args = "--proxy-server=direct:// --disable-extensions --disable-gpu --user-data-dir="
|
||||||
+ QString::fromStdWString(std::filesystem::current_path())
|
+ QString::fromStdWString(std::filesystem::current_path())
|
||||||
@ -237,8 +237,8 @@ void DevTools::onTextMessageReceived(QString message)
|
|||||||
{
|
{
|
||||||
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
|
for (auto iter = mapmethod.cbegin(); iter != mapmethod.cend();)
|
||||||
{
|
{
|
||||||
if ((iter->second.value("method") == root.value("method"))
|
if (iter->second.value("method") == root.value("method")
|
||||||
&& (compareJson(iter->second.value("params"), root.value("params"))))
|
&& compareJson(iter->second.value("params"), root.value("params")))
|
||||||
{
|
{
|
||||||
mutex.lock();
|
mutex.lock();
|
||||||
mapmethod.erase(iter++);
|
mapmethod.erase(iter++);
|
||||||
|
@ -33,28 +33,48 @@ QStringList languages
|
|||||||
};
|
};
|
||||||
|
|
||||||
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
|
int docfound = -1, targetNodeId = -1, session = -1, pageenabled = -1, useragentflag = -1;
|
||||||
|
long update = -1;
|
||||||
|
|
||||||
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
|
std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devtools)
|
||||||
{
|
{
|
||||||
QString qtext = S(text);
|
QString qtext = S(text);
|
||||||
|
qtext.remove(QString(12288)); // japanese space (no need for translator)
|
||||||
|
|
||||||
// Check text for repeated symbols (e.g. only ellipsis)
|
// Check quotes
|
||||||
if (qtext.length() > 2)
|
bool checkquote = false;
|
||||||
for (int i = 1; i < (qtext.length() - 1); i++)
|
if ((qtext.front() == QString(12300) && qtext.back() == QString(12301)) // japanese quotation marks
|
||||||
|
|| (qtext.front() == "\"" && qtext.back() == "\""))
|
||||||
{
|
{
|
||||||
if (qtext[i] != qtext[1])
|
checkquote = true;
|
||||||
break;
|
qtext.remove(0, 1);
|
||||||
if ((i + 2) == qtext.length() && (qtext.front() == qtext.back()))
|
qtext.chop(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (qtext == QString(12387)) // if text consists of only one sokuon, add exclamation mark for correct translation
|
||||||
|
{
|
||||||
|
qtext += "!";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check ellipsis
|
||||||
|
int count = qtext.count(QString(8230)); // ellipsis
|
||||||
|
if (count == qtext.length()
|
||||||
|
|| (count == (qtext.length() - 1) && qtext.back() == QString(12290))) // japanese end of a sentence
|
||||||
{
|
{
|
||||||
return { true, text };
|
return { true, text };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Put quotes back
|
||||||
|
if (checkquote)
|
||||||
|
{
|
||||||
|
qtext = "\"" + qtext + "\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check status
|
||||||
if (devtools->getStatus() == "Stopped")
|
if (devtools->getStatus() == "Stopped")
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_CHROME) };
|
return { false, FormatString(L"%s", ERROR_CHROME) };
|
||||||
}
|
}
|
||||||
if ((devtools->getStatus().startsWith("Fail")) || (devtools->getStatus().startsWith("Unconnected")))
|
if (devtools->getStatus().startsWith("Fail") || devtools->getStatus().startsWith("Unconnected"))
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_START_CHROME) };
|
return { false, FormatString(L"%s", ERROR_START_CHROME) };
|
||||||
}
|
}
|
||||||
@ -65,22 +85,15 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
targetNodeId = -1;
|
targetNodeId = -1;
|
||||||
pageenabled = -1;
|
pageenabled = -1;
|
||||||
useragentflag = -1;
|
useragentflag = -1;
|
||||||
|
update = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add spaces near ellipsis for better translation and check for quotes
|
// Erase tags and reduce the number of ellipsis for better translation
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230));
|
qtext.remove(QRegExp("<[^>]*>"));
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]" + "[" + QString(8230) + "]"), QString(8230));
|
qtext.replace(QRegExp("(" + QString(8230) + ")+"), " " + QString(8230));
|
||||||
qtext.replace(QRegularExpression("[" + QString(8230) + "]"), " " + QString(8230) + " ");
|
|
||||||
bool checkquote = false;
|
|
||||||
if ((qtext.front() == QString(12300)) && (qtext.back() == QString(12301)))
|
|
||||||
{
|
|
||||||
checkquote = true;
|
|
||||||
qtext.remove(0, 1);
|
|
||||||
qtext.chop(1);
|
|
||||||
}
|
|
||||||
QJsonObject root;
|
|
||||||
|
|
||||||
// Enable page feedback
|
// Enable page feedback
|
||||||
|
QJsonObject root;
|
||||||
if (pageenabled == -1)
|
if (pageenabled == -1)
|
||||||
{
|
{
|
||||||
if (!devtools->SendRequest("Page.enable", {}, root))
|
if (!devtools->SendRequest("Page.enable", {}, root))
|
||||||
@ -94,9 +107,9 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
if (useragentflag == -1)
|
if (useragentflag == -1)
|
||||||
{
|
{
|
||||||
QString useragent = devtools->getUserAgent();
|
QString useragent = devtools->getUserAgent();
|
||||||
useragent.replace(QRegularExpression("HeadlessChrome"), "Chrome");
|
|
||||||
if (!useragent.isEmpty())
|
if (!useragent.isEmpty())
|
||||||
{
|
{
|
||||||
|
useragent.replace("HeadlessChrome", "Chrome");
|
||||||
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
|
if (!devtools->SendRequest("Network.setUserAgentOverride", { {"userAgent", useragent} }, root))
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
@ -107,11 +120,18 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
|
|
||||||
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
|
long navigate = devtools->methodToReceive("Page.navigatedWithinDocument");
|
||||||
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
|
long target = devtools->methodToReceive("DOM.attributeModified", { { "value" , "lmt__mobile_share_container" } });
|
||||||
|
if (update == -1)
|
||||||
|
{
|
||||||
|
update = devtools->methodToReceive("DOM.documentUpdated");
|
||||||
|
}
|
||||||
|
|
||||||
// Navigate to site
|
// Navigate to site
|
||||||
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
|
QString fullurl = URL + "#ja/" + S(translateTo.Copy()) + "/" + qtext;
|
||||||
if (devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
|
if (!devtools->SendRequest("Page.navigate", { {"url", fullurl} }, root))
|
||||||
{
|
{
|
||||||
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
|
}
|
||||||
|
|
||||||
// Wait until page is loaded
|
// Wait until page is loaded
|
||||||
float timer = 0;
|
float timer = 0;
|
||||||
int timer_stop = 10;
|
int timer_stop = 10;
|
||||||
@ -125,22 +145,29 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if document is outdated
|
||||||
|
if (devtools->checkMethod(update))
|
||||||
|
{
|
||||||
|
docfound = -1;
|
||||||
|
targetNodeId = -1;
|
||||||
|
update = -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Get document
|
// Get document
|
||||||
if (docfound == -1)
|
if (docfound == -1)
|
||||||
{
|
{
|
||||||
if (!devtools->SendRequest("DOM.getDocument", {}, root))
|
if (!devtools->SendRequest("DOM.getDocument", {}, root))
|
||||||
{
|
{
|
||||||
docfound = -1;
|
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
}
|
}
|
||||||
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
|
docfound = root.value("result").toObject().value("root").toObject().value("nodeId").toInt();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get target selector
|
// Get target selector
|
||||||
if (targetNodeId == -1)
|
if (targetNodeId == -1)
|
||||||
{
|
{
|
||||||
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root))
|
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "textarea.lmt__target_textarea"} }, root)
|
||||||
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
|
|| root.value("result").toObject().value("nodeId").toInt() == 0)
|
||||||
{
|
{
|
||||||
docfound = -1;
|
docfound = -1;
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
@ -157,14 +184,19 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Catch the translation
|
// Catch the translation
|
||||||
devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root);
|
if (!devtools->SendRequest("DOM.getOuterHTML", { {"nodeId", targetNodeId + 1} }, root))
|
||||||
|
{
|
||||||
|
docfound = -1;
|
||||||
|
targetNodeId = -1;
|
||||||
|
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
||||||
|
}
|
||||||
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
QString OuterHTML = root.value("result").toObject().value("outerHTML").toString();
|
||||||
if (OuterHTML == "<div></div>")
|
if (OuterHTML == "<div></div>")
|
||||||
{
|
{
|
||||||
// Try to catch the notification
|
// Try to catch the notification
|
||||||
int noteNodeId = -1;
|
int noteNodeId = -1;
|
||||||
if (!(devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root))
|
if (!devtools->SendRequest("DOM.querySelector", { {"nodeId", docfound}, {"selector", "div.lmt__system_notification"} }, root)
|
||||||
|| (root.value("result").toObject().value("nodeId").toInt() == 0))
|
|| root.value("result").toObject().value("nodeId").toInt() == 0)
|
||||||
{
|
{
|
||||||
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
return { false, FormatString(L"%s: %d ", ERROR_GOT_TIMEOUT, timer_stop) };
|
||||||
}
|
}
|
||||||
@ -200,15 +232,5 @@ std::pair<bool, std::wstring> Translate(const std::wstring& text, DevTools* devt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get quotes back
|
|
||||||
if (checkquote)
|
|
||||||
{
|
|
||||||
OuterHTML = "\"" + OuterHTML + "\"";
|
|
||||||
}
|
|
||||||
return { true, S(OuterHTML) };
|
return { true, S(OuterHTML) };
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return { false, FormatString(L"%s", ERROR_COMMAND_FAIL) };
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -50,6 +50,17 @@ void SaveCache()
|
|||||||
savedSize = translationCache->size();
|
savedSize = translationCache->size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EraseControlCharacters(std::wstring& text)
|
||||||
|
{
|
||||||
|
for (auto it = text.begin(); it!= text.end(); ++it)
|
||||||
|
{
|
||||||
|
if ((*it == '\n') || (*it == '\r') || (*it == '\t') || (int(*it) == 4) || (int(*it) == 5))
|
||||||
|
{
|
||||||
|
text.erase(it--);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Window : public QDialog
|
class Window : public QDialog
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -184,7 +195,11 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
|||||||
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
|
if (auto it = translationCache->find(sentence); it != translationCache->end()) translation = it->second + L"\x200b";
|
||||||
}
|
}
|
||||||
if (translation.empty() && (sentenceInfo["current select"]))
|
if (translation.empty() && (sentenceInfo["current select"]))
|
||||||
|
{
|
||||||
|
EraseControlCharacters(sentence);
|
||||||
std::tie(cache, translation) = Translate(sentence, devtools);
|
std::tie(cache, translation) = Translate(sentence, devtools);
|
||||||
|
}
|
||||||
|
|
||||||
if (cache) translationCache->try_emplace(sentence, translation);
|
if (cache) translationCache->try_emplace(sentence, translation);
|
||||||
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user