Textractor/extensions/deepltranslate.cpp

202 lines
6.6 KiB
C++
Raw Permalink Normal View History

#include "qtcommon.h"
#include "translatewrapper.h"
#include "network.h"
#include <random>
extern const wchar_t* TRANSLATION_ERROR;
const char* TRANSLATION_PROVIDER = "DeepL Translate";
const char* GET_API_KEY_FROM = "https://www.deepl.com/pro.html#developer";
extern const QStringList languagesTo
{
2024-12-17 10:34:13 -08:00
"Arabic",
"Bulgarian",
"Czech",
"Danish",
2024-12-17 10:34:13 -08:00
"German",
"Greek",
"English (backward compatibility)",
"English (British)",
2024-12-17 10:34:13 -08:00
"English (American)",
"Spanish",
"Estonian",
"Finnish",
"French",
"Hungarian",
"Indonesian",
"Italian",
"Japanese",
2024-12-17 10:34:13 -08:00
"Korean",
"Lithuanian",
2024-12-17 10:34:13 -08:00
"Latvian",
"Norwegian Bokm<6B>l",
"Dutch",
"Polish",
2024-12-17 10:34:13 -08:00
"Portuguese (backward compatibility)",
"Portuguese (Brazilian)",
"Portuguese (all Portuguese variants excluding Brazilian Portuguese)",
"Romanian",
"Russian",
"Slovak",
"Slovenian",
"Swedish",
2024-12-17 10:34:13 -08:00
"Turkish",
"Ukrainian",
"Chinese (backward compatibility)",
"Chinese (simplified)",
"Chinese (traditional)"
},
languagesFrom
{
2024-12-17 10:34:13 -08:00
"Arabic",
"Bulgarian",
2024-12-17 10:34:13 -08:00
"Chinese (all Chinese variants)",
"Czech",
"Danish",
"Dutch",
2024-12-17 10:34:13 -08:00
"English (all English variants)",
"Estonian",
"Finnish",
"French",
"German",
"Greek",
"Hungarian",
"Indonesian",
"Italian",
"Japanese",
2024-12-17 10:34:13 -08:00
"Korean",
"Latvian",
"Lithuanian",
2024-12-17 10:34:13 -08:00
"Norwegian Bokm<6B>l",
"Polish",
2024-12-17 10:34:13 -08:00
"Portuguese (all Portuguese variants)",
"Romanian",
"Russian",
"Slovak",
"Slovenian",
"Spanish",
"Swedish",
2024-12-17 10:34:13 -08:00
"Turkish",
"Ukrainian"
};
extern const std::unordered_map<std::wstring, std::wstring> codes
{
2024-12-17 10:34:13 -08:00
{ { L"Arabic" }, { L"AR" } },
{ { L"Bulgarian" }, { L"BG" } },
{ { L"Czech" }, { L"CS" } },
{ { L"Danish" }, { L"DA" } },
2024-12-17 10:34:13 -08:00
{ { L"German" }, { L"DE" } },
{ { L"Greek" }, { L"EL" } },
{ { L"English (all English variants)" }, { L"EN" } },
{ { L"English (backward compatibility)" }, { L"EN" } },
{ { L"English (British)" }, { L"EN-GB" } },
2024-12-17 10:34:13 -08:00
{ { L"English (American)" }, { L"EN-US" } },
{ { L"Spanish" }, { L"ES" } },
{ { L"Estonian" }, { L"ET" } },
{ { L"Finnish" }, { L"FI" } },
{ { L"French" }, { L"FR" } },
{ { L"Hungarian" }, { L"HU" } },
{ { L"Indonesian" }, { L"ID" } },
{ { L"Italian" }, { L"IT" } },
{ { L"Japanese" }, { L"JA" } },
2024-12-17 10:34:13 -08:00
{ { L"Korean" }, { L"KO" } },
{ { L"Lithuanian" }, { L"LT" } },
2024-12-17 10:34:13 -08:00
{ { L"Latvian" }, { L"LV" } },
{ { L"Norwegian Bokm<6B>l" }, { L"NB" } },
{ { L"Dutch" }, { L"NL" } },
{ { L"Polish" }, { L"PL" } },
2024-12-17 10:34:13 -08:00
{ { L"Portuguese (all Portuguese variants)" }, { L"PT" } },
{ { L"Portuguese (backward compatibility)" }, { L"PT" } },
{ { L"Portuguese (Brazilian)" }, { L"PT-BR" } },
{ { L"Portuguese (all Portuguese variants excluding Brazilian Portuguese)" }, { L"PT-PT" } },
{ { L"Romanian" }, { L"RO" } },
{ { L"Russian" }, { L"RU" } },
{ { L"Slovak" }, { L"SK" } },
{ { L"Slovenian" }, { L"SL" } },
{ { L"Swedish" }, { L"SV" } },
{ { L"Turkish" }, { L"TR" } },
2024-12-17 10:34:13 -08:00
{ { L"Ukrainian" }, { L"UK" } },
{ { L"Chinese (all Chinese variants)" }, { L"ZH" } },
{ { L"Chinese (backward compatibility)" }, { L"ZH" } },
{ { L"Chinese (simplified)" }, { L"ZH-HANS" } },
{ { L"Chinese (traditional)" }, { L"ZH-HANT" } },
{ { L"?" }, { L"auto" } }
2024-12-17 10:34:13 -08:00
};
bool translateSelectedOnly = true, useRateLimiter = true, rateLimitSelected = true, useCache = true, useFilter = true;
2021-06-30 17:52:52 -06:00
int tokenCount = 10, rateLimitTimespan = 60000, maxSentenceSize = 1000;
2020-08-12 01:42:24 -06:00
enum KeyType { CAT, REST };
int keyType = REST;
std::pair<bool, std::wstring> Translate(const std::wstring& text, TranslationParam tlp)
{
if (!tlp.authKey.empty())
{
std::string translateFromComponent = tlp.translateFrom == L"?" ? "" : "&source_lang=" + WideStringToString(codes.at(tlp.translateFrom));
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
tlp.authKey.find(L":fx") == std::string::npos ? L"api.deepl.com" : L"api-free.deepl.com",
L"POST",
2020-08-12 01:42:24 -06:00
keyType == CAT ? L"/v1/translate" : L"/v2/translate",
FormatString("text=%S&auth_key=%S&target_lang=%S", Escape(text), tlp.authKey, codes.at(tlp.translateTo)) + translateFromComponent,
L"Content-Type: application/x-www-form-urlencoded"
}; httpRequest && (httpRequest.response.find(L"translations") != std::string::npos || (httpRequest = HttpRequest{
2020-08-12 01:42:24 -06:00
L"Mozilla/5.0 Textractor",
tlp.authKey.find(L":fx") == std::string::npos ? L"api.deepl.com" : L"api-free.deepl.com",
2020-08-12 01:42:24 -06:00
L"POST",
(keyType = !keyType) == CAT ? L"/v1/translate" : L"/v2/translate",
FormatString("text=%S&auth_key=%S&target_lang=%S", Escape(text), tlp.authKey, codes.at(tlp.translateTo)) + translateFromComponent,
L"Content-Type: application/x-www-form-urlencoded"
2020-08-12 01:42:24 -06:00
})))
// Response formatted as JSON: translation starts with text":" and ends with "}]
2020-12-14 06:26:01 -07:00
if (auto translation = Copy(JSON::Parse(httpRequest.response)[L"translations"][0][L"text"].String())) return { true, translation.value() };
else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };
else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) };
}
// the following code was reverse engineered from the DeepL website; it's as close as I could make it but I'm not sure what parts of this could be removed and still have it work
2021-07-01 23:50:47 -06:00
int id = 10000 * std::uniform_int_distribution(0, 9999)(std::random_device()) + 1;
int64_t r = _time64(nullptr), n = std::count(text.begin(), text.end(), L'i') + 1;
// user_preferred_langs? what should priority be? does timestamp do anything? other translation quality options?
auto body = FormatString(R"(
{
"id": %d,
"jsonrpc": "2.0",
"method": "LMT_handle_jobs",
"params": {
"priority": -1,
"timestamp": %lld,
"lang": {
"target_lang": "%.2S",
"source_lang_user_selected": "%S"
},
"jobs": [{
"raw_en_sentence": "%s",
"raw_en_context_before": [],
"kind": "default",
"preferred_num_beams": 1,
"quality": "fast",
"raw_en_context_after": []
}]
}
}
)", id, r + (n - r % n), codes.at(tlp.translateTo), codes.at(tlp.translateFrom), JSON::Escape(WideStringToString(text)));
// missing accept-encoding header since it fucks up HttpRequest
if (HttpRequest httpRequest{
L"Mozilla/5.0 Textractor",
L"www2.deepl.com",
L"POST",
L"/jsonrpc",
body,
2020-10-13 20:19:53 -06:00
L"Host: www2.deepl.com\r\nAccept-Language: en-US,en;q=0.5\r\nContent-type: application/json; charset=utf-8\r\nOrigin: https://www.deepl.com\r\nTE: Trailers",
INTERNET_DEFAULT_PORT,
L"https://www.deepl.com/translator",
2020-08-12 01:42:24 -06:00
WINHTTP_FLAG_SECURE
})
2020-12-14 06:26:01 -07:00
if (auto translation = Copy(JSON::Parse(httpRequest.response)[L"result"][L"translations"][0][L"beams"][0][L"postprocessed_sentence"].String())) return { true, translation.value() };
else return { false, FormatString(L"%s: %s", TRANSLATION_ERROR, httpRequest.response) };
else return { false, FormatString(L"%s (code=%u)", TRANSLATION_ERROR, httpRequest.errorCode) };
}