From a7f98301a152030c1abc9fe147bf646b4199e8dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=8D=E5=85=AE=E6=83=9A=E5=85=AE?= <1173718158@qq.com> Date: Mon, 6 Jan 2025 20:44:12 +0800 Subject: [PATCH] . --- cpp/LunaHook/LunaHook/engine32/Debonosu.cpp | 7 +- cpp/LunaHook/LunaHook/engine32/Elf.cpp | 4 +- cpp/LunaHook/LunaHook/engine32/Escude.cpp | 5 +- cpp/LunaHook/LunaHook/engine32/LunaSoft.cpp | 7 +- cpp/LunaHook/LunaHook/engine32/Malie.cpp | 5 +- cpp/LunaHook/LunaHook/engine32/Mink.cpp | 136 ++++++++++++------ cpp/LunaHook/LunaHook/engine32/Mink.h | 46 ++++-- cpp/LunaHook/LunaHook/engine32/QLIE.cpp | 6 +- cpp/LunaHook/LunaHook/engine32/Siglus.cpp | 8 -- cpp/LunaHook/LunaHook/engine32/Taskforce2.cpp | 8 +- cpp/LunaHook/LunaHook/engine32/Unicorn.cpp | 5 +- cpp/LunaHook/LunaHook/engine32/WillPlus.cpp | 2 +- cpp/LunaHook/LunaHook/enginecollection32.cpp | 3 +- cpp/version.cmake | 2 +- docs/en/useapis/ocrapi.md | 4 + docs/ja/useapis/ocrapi.md | 4 + docs/zh/useapis/ocrapi.md | 4 + py/LunaTranslator/gui/setting_about.py | 5 +- py/LunaTranslator/ocrengines/tesseract5.py | 123 ++++++++++++---- py/files/defaultconfig/ocrsetting.json | 21 --- 20 files changed, 257 insertions(+), 148 deletions(-) diff --git a/cpp/LunaHook/LunaHook/engine32/Debonosu.cpp b/cpp/LunaHook/LunaHook/engine32/Debonosu.cpp index e6f5d643..e58b63a1 100644 --- a/cpp/LunaHook/LunaHook/engine32/Debonosu.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Debonosu.cpp @@ -23,16 +23,15 @@ namespace } void embed_fun(hook_context *s, TextBuffer buffer) { - static std::string ts; - ts = buffer.viewA(); + auto ts = allocateString(buffer.viewA()); if (_type == 1) { - s->stack[1] = (DWORD)ts.c_str(); + s->stack[1] = (DWORD)ts; } else { - s->ecx = (DWORD)ts.c_str(); + s->ecx = (DWORD)ts; } } bool InsertDebonosuScenarioHook() diff --git a/cpp/LunaHook/LunaHook/engine32/Elf.cpp b/cpp/LunaHook/LunaHook/engine32/Elf.cpp index 0cef6d33..63245f0a 100644 --- a/cpp/LunaHook/LunaHook/engine32/Elf.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Elf.cpp @@ -211,7 +211,6 @@ namespace char nameText[1]; // +4*10+4*3, could be bad address though }; - std::string data_; TextArgument *scenarioArg_, *nameArg_; LPCSTR scenarioText_; @@ -265,10 +264,9 @@ namespace auto text = arg->scenarioText; if (!Engine::isAddressReadable(text)) return; - data_ = newData; scenarioArg_ = arg; scenarioText_ = arg->scenarioText; - arg->scenarioText = (LPCSTR)data_.c_str(); + arg->scenarioText = (LPCSTR)allocateString(newData); } else if (arg->nameFlag == 0) { diff --git a/cpp/LunaHook/LunaHook/engine32/Escude.cpp b/cpp/LunaHook/LunaHook/engine32/Escude.cpp index 87460537..8f17b152 100644 --- a/cpp/LunaHook/LunaHook/engine32/Escude.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Escude.cpp @@ -218,12 +218,11 @@ namespace } void embed_fun(hook_context *s, TextBuffer buffer) { - static std::string data_; - data_ = buffer.strA(); + auto data_ = buffer.strA(); auto arg = (HookArgument *)s->stack[1]; if (trimmedText != arg->text) data_.insert(0, std::string(arg->text, trimmedText - arg->text)); - arg->text = data_.c_str(); + arg->text = allocateString(data_); } } // unnamed namespace bool InsertEscudeHook() diff --git a/cpp/LunaHook/LunaHook/engine32/LunaSoft.cpp b/cpp/LunaHook/LunaHook/engine32/LunaSoft.cpp index b60af9d2..c35a0ad3 100644 --- a/cpp/LunaHook/LunaHook/engine32/LunaSoft.cpp +++ b/cpp/LunaHook/LunaHook/engine32/LunaSoft.cpp @@ -176,10 +176,9 @@ namespace } void hookafter1(hook_context *s, TextBuffer buffer) { - static std::string newData; - newData = buffer.strA(); - newData = cache_.put(newData).first; - s->stack[1] = (ULONG)newData.c_str(); // arg1 + auto newData = buffer.strA(); + cache_.put(newData); + s->stack[1] = (ULONG)allocateString(newData); // arg1 } } // namespace Private diff --git a/cpp/LunaHook/LunaHook/engine32/Malie.cpp b/cpp/LunaHook/LunaHook/engine32/Malie.cpp index a7942888..59f49db4 100644 --- a/cpp/LunaHook/LunaHook/engine32/Malie.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Malie.cpp @@ -1113,7 +1113,6 @@ namespace // I need a cache retainer here to make sure same text result in same result void hookafter(hook_context *s, TextBuffer buffer) { - static std::string data_; static std::unordered_set hashes_; auto text = (LPCWSTR)s->stack[1]; if (!text || !*text || !(text[0] == 0x7 && text[1] == 0x8) && all_ascii(text)) @@ -1171,10 +1170,8 @@ namespace data.push_back(0); data.push_back(0); data.push_back(0); - data_ = data; - text = (LPCWSTR)data_.c_str(); - s->stack[1] = (ULONG)text; + s->stack[1] = (ULONG)allocateString(data); } } void hookBefore(hook_context *s, HookParam *hp, TextBuffer *buffer, uintptr_t *role) diff --git a/cpp/LunaHook/LunaHook/engine32/Mink.cpp b/cpp/LunaHook/LunaHook/engine32/Mink.cpp index 907b33f8..d9cf9ee6 100644 --- a/cpp/LunaHook/LunaHook/engine32/Mink.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Mink.cpp @@ -1,4 +1,4 @@ -#include"Mink.h" +#include "Mink.h" /** 12/23/2014 jichi: Mink games (not sure the engine name) * Sample game: * - [130111] [Mink EGO] お�ちも�にはぜったい言えなぁ�ぁ�つなこと�-- /HB-4*0:64@45164A @@ -103,7 +103,7 @@ * 004516dd 8a4c11 1d mov cl,byte ptr ds:[ecx+edx+0x1d] */ -#if 0 // hook to the caller of dynamic GetGlyphOutlineA +#if 0 // hook to the caller of dynamic GetGlyphOutlineA /** * @param addr function address * @param frame real address of the function, supposed to be the same as addr @@ -136,7 +136,7 @@ static bool InsertMinkDynamicHook(LPVOID fun, DWORD frame, DWORD stack) static void SpecialHookMink(hook_context *context, HookParam *hp, TextBuffer *buffer, uintptr_t *split) { - //DWORD addr = *(DWORD *)(esp_base + hp->offset); // default value + // DWORD addr = *(DWORD *)(esp_base + hp->offset); // default value DWORD addr = context->eax; if (!IthGetMemoryRange((LPVOID)(addr), 0, 0)) return; @@ -154,64 +154,110 @@ static void SpecialHookMink(hook_context *context, HookParam *hp, TextBuffer *bu bool InsertMinkHook() { const BYTE bytes[] = { - 0x38,0x18, // 00451648 3818 cmp byte ptr ds:[eax],bl - 0x75, 0x14, // 0045164a 75 14 jnz short .00451660 ; jichi: hook here - 0x38,0x5d, 0xf4, // 0045164c 385d f4 cmp byte ptr ss:[ebp-0xc],bl - 0x74, 0x07, // 0045164f 74 07 je short .00451658 - 0x8b,0x45, 0xf0, // 00451651 8b45 f0 mov eax,dword ptr ss:[ebp-0x10] - 0x83,0x60, 0x70, 0xfd, // 00451654 8360 70 fd and dword ptr ds:[eax+0x70],0xfffffffd - 0x8b,0x45, 0x08 // 00451658 8b45 08 mov eax,dword ptr ss:[ebp+0x8] + 0x38, 0x18, // 00451648 3818 cmp byte ptr ds:[eax],bl + 0x75, 0x14, // 0045164a 75 14 jnz short .00451660 ; jichi: hook here + 0x38, 0x5d, 0xf4, // 0045164c 385d f4 cmp byte ptr ss:[ebp-0xc],bl + 0x74, 0x07, // 0045164f 74 07 je short .00451658 + 0x8b, 0x45, 0xf0, // 00451651 8b45 f0 mov eax,dword ptr ss:[ebp-0x10] + 0x83, 0x60, 0x70, 0xfd, // 00451654 8360 70 fd and dword ptr ds:[eax+0x70],0xfffffffd + 0x8b, 0x45, 0x08 // 00451658 8b45 08 mov eax,dword ptr ss:[ebp+0x8] + }; + enum + { + addr_offset = 2 }; - enum { addr_offset = 2 }; ULONG addr = MemDbg::findBytes(bytes, sizeof(bytes), processStartAddress, processStopAddress); - //ULONG addr = 0x45164a; - //ULONG addr = 0x451648; - //ULONG addr = 0x4521a8; - //GROWL_DWORD(addr); - if (!addr) { + // ULONG addr = 0x45164a; + // ULONG addr = 0x451648; + // ULONG addr = 0x4521a8; + // GROWL_DWORD(addr); + if (!addr) + { ConsoleOutput("Mink: pattern not found"); return false; } HookParam hp; hp.address = addr + addr_offset; - hp.offset=regoffset(eax); // -8 + hp.offset = regoffset(eax); // -8 hp.split = 0x64; - hp.type = USING_SPLIT|DATA_INDIRECT|USING_CHAR; // 0x18 + hp.type = USING_SPLIT | DATA_INDIRECT | USING_CHAR; // 0x18 hp.text_fun = SpecialHookMink; ConsoleOutput("INSERT Mink"); return NewHook(hp, "Mink"); - //ConsoleOutput("Mink: disable GDI hooks"); + // ConsoleOutput("Mink: disable GDI hooks"); // } -bool Mink2::attach_function() { +bool Mink2::attach_function() +{ const BYTE pattern[] = { - //破談屋 - //https://vndb.org/v2719 - 0xF7,0xC7,0x03,0x00,0x00,0x00, - 0x75,XX, - 0xC1,0xE9,0x02, - 0x83,0xE2,0x03, - 0x83,0xF9,0x08, - 0x72,XX - }; - bool found=false; + // 破談屋 + // https://vndb.org/v2719 + 0xF7, 0xC7, 0x03, 0x00, 0x00, 0x00, + 0x75, XX, + 0xC1, 0xE9, 0x02, + 0x83, 0xE2, 0x03, + 0x83, 0xF9, 0x08, + 0x72, XX}; + bool found = false; for (auto addr : Util::SearchMemory(pattern, sizeof(pattern), PAGE_EXECUTE, processStartAddress, processStopAddress)) { - addr = MemDbg::findEnclosingAlignedFunction(addr,0x100); - if (addr == 0)return false; - HookParam hp; - hp.address = addr; - hp.offset=stackoffset(2); - hp.length_offset=3; - hp.type = USING_STRING; - found|=NewHook(hp, "Mink"); - } - return found; -} -bool Mink::attach_function() { - - return InsertMinkHook(); -} \ No newline at end of file + addr = MemDbg::findEnclosingAlignedFunction(addr, 0x100); + if (addr == 0) + return false; + HookParam hp; + hp.address = addr; + hp.offset = stackoffset(2); + hp.length_offset = 3; + hp.type = USING_STRING; + found |= NewHook(hp, "Mink"); + } + return found; +} +bool Mink::attach_function() +{ + + return InsertMinkHook(); +} + +bool Mink3::attach_function() +{ + const BYTE pattern[] = { + // 夜勤病棟 復刻版+ + 0xff, 0x15, XX4, + 0x33, 0xdb, + 0x89, 0x44, 0x24, XX, + 0x85, 0xc0, + 0x7e, XX, + 0x8a, 0x07, + 0x8d, 0x4c, 0x24, 0x10, + 0x50, + 0xe8, XX4, + 0x83, 0xf8, 0x02, + 0x75, 0x08, + 0x03, 0xd8, + 0x03, 0xf8, + 0x03, 0xf0, + 0xeb, XX, + 0x57, + 0x8b, 0xcd, + 0xe8, XX4, + 0x25, 0xff, 0x00, 0x00, 0x00, + 0x83, 0xe8, 0x00}; + auto addr = MemDbg::findBytes(pattern, sizeof(pattern), processStartAddress, processStopAddress); + if (!addr) + return false; + addr = MemDbg::findEnclosingAlignedFunction(addr, 0x100); + if (!addr) + return false; + HookParam hp; + hp.address = addr; + hp.offset = stackoffset(1); + hp.type = USING_STRING | EMBED_ABLE | EMBED_AFTER_OVERWRITE | EMBED_DYNA_SJIS; + hp.embed_hook_font = F_TextOutA; + hp.lineSeparator = L"\\n"; + PcHooks::hookGDIFunctions(TextOutA); + return NewHook(hp, "Mink"); +} \ No newline at end of file diff --git a/cpp/LunaHook/LunaHook/engine32/Mink.h b/cpp/LunaHook/LunaHook/engine32/Mink.h index c265d1ab..b0bba554 100644 --- a/cpp/LunaHook/LunaHook/engine32/Mink.h +++ b/cpp/LunaHook/LunaHook/engine32/Mink.h @@ -1,22 +1,38 @@ -class Mink:public ENGINE{ - public: - Mink(){ - - check_by=CHECK_BY::FILE; - check_by_target=L"*.at2";//Mink, sample files: voice.at2, voice.det, voice.nme +class Mink : public ENGINE +{ +public: + Mink() + { + + check_by = CHECK_BY::FILE; + check_by_target = L"*.at2"; // Mink, sample files: voice.at2, voice.det, voice.nme }; - bool attach_function(); + bool attach_function(); }; -class Mink2:public ENGINE{ - public: - Mink2(){ - - check_by=CHECK_BY::FILE; - check_by_target=L"Scr\\*.sc"; - is_engine_certain=false; +class Mink2 : public ENGINE +{ +public: + Mink2() + { + + check_by = CHECK_BY::FILE; + check_by_target = L"Scr\\*.sc"; + is_engine_certain = false; }; - bool attach_function(); + bool attach_function(); +}; +class Mink3 : public ENGINE +{ +public: + Mink3() + { + // 夜勤病棟 復刻版+ + check_by = CHECK_BY::FILE_ALL; + check_by_target = check_by_list{L"voice*.dat", L"tpd.dat", L"tms.dat", L"thm.dat", L"se.dat", L"scr.dat", L"rec.dat", L"bgm.dat", L"cgm.dat", L"gpd.dat", L"gpdtp.dat", L"mov.dat", L"msk.dat", L"msktp.dat", L"read.dat"}; + is_engine_certain = false; + }; + bool attach_function(); }; \ No newline at end of file diff --git a/cpp/LunaHook/LunaHook/engine32/QLIE.cpp b/cpp/LunaHook/LunaHook/engine32/QLIE.cpp index c0b0521b..29378470 100644 --- a/cpp/LunaHook/LunaHook/engine32/QLIE.cpp +++ b/cpp/LunaHook/LunaHook/engine32/QLIE.cpp @@ -746,10 +746,8 @@ namespace newData = newData + "[n]"; else if (endtype == 2) newData = newData + "[c]"; - static std::string data_; - data_ = newData; - s->edx = (ULONG)data_.c_str(); // reset arg1 - *(DWORD *)(s->edx - 4) = data_.size(); + s->edx = (ULONG)allocateString(newData); // reset arg1 + *(DWORD *)(s->edx - 4) = newData.size(); // arg->size = data_.size(); // no idea why this will crash ... //*(DWORD *)(s->edx - 4) = newData.size() + trimmedText - text; diff --git a/cpp/LunaHook/LunaHook/engine32/Siglus.cpp b/cpp/LunaHook/LunaHook/engine32/Siglus.cpp index 97e126d4..f5fbf88d 100644 --- a/cpp/LunaHook/LunaHook/engine32/Siglus.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Siglus.cpp @@ -1764,11 +1764,7 @@ namespace void hookafter(hook_context *s, TextBuffer buffer) { auto arg = (TextUnionW *)(type_ == Type1 ? s->ecx : s->stack[1]); - auto argValue = *arg; arg->setText(buffer.viewW()); - - // Restoring is indispensible, and as a result, the default hook does not work - //*arg = argValue; } } bool attach(ULONG startAddress, ULONG stopAddress) // attach scenario @@ -1794,8 +1790,6 @@ namespace OtherHook namespace Private { - TextUnionW *arg_, - argValue_; void hookBefore(hook_context *s, HookParam *hp, TextBuffer *buffer, uintptr_t *role) { static std::wstring text_; @@ -1833,8 +1827,6 @@ namespace OtherHook void hookafter2(hook_context *s, TextBuffer buffer) { auto arg = (TextUnionW *)s->stack[0]; - arg_ = arg; - argValue_ = *arg; arg->setText(buffer.viewW()); } diff --git a/cpp/LunaHook/LunaHook/engine32/Taskforce2.cpp b/cpp/LunaHook/LunaHook/engine32/Taskforce2.cpp index 133f697e..abe6722b 100644 --- a/cpp/LunaHook/LunaHook/engine32/Taskforce2.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Taskforce2.cpp @@ -211,13 +211,11 @@ namespace } void hookafter(hook_context *s, TextBuffer buffer) { - static std::string data_; std::string newData = buffer.strA(); - data_ = newData; int capacity = s->stack[1]; // arg 2, should always be 0x1000 - if (data_.size() >= capacity) - data_ = data_.substr(0, capacity - 1); - s->stack[2] = (ULONG)data_.c_str(); // arg 3 + if (newData.size() >= capacity) + newData = newData.substr(0, capacity - 1); + s->stack[2] = (ULONG)allocateString(newData); // arg 3 } } // namespace Private diff --git a/cpp/LunaHook/LunaHook/engine32/Unicorn.cpp b/cpp/LunaHook/LunaHook/engine32/Unicorn.cpp index 24a23617..7c6443a6 100644 --- a/cpp/LunaHook/LunaHook/engine32/Unicorn.cpp +++ b/cpp/LunaHook/LunaHook/engine32/Unicorn.cpp @@ -575,9 +575,8 @@ namespace void hookafter(hook_context *s, TextBuffer buffer) { - static std::string data_; - data_ = buffer.strA(); - s->stack[1] = (ULONG)data_.c_str(); + auto data_ = buffer.strA(); + s->stack[1] = (ULONG)allocateString(data_); s->stack[2] = data_.size(); } } // namespace Private diff --git a/cpp/LunaHook/LunaHook/engine32/WillPlus.cpp b/cpp/LunaHook/LunaHook/engine32/WillPlus.cpp index 63713042..bf63b0ae 100644 --- a/cpp/LunaHook/LunaHook/engine32/WillPlus.cpp +++ b/cpp/LunaHook/LunaHook/engine32/WillPlus.cpp @@ -834,7 +834,7 @@ namespace if (suffixSize) newText.append(std::wstring(trimmedText + trimmedSize, suffixSize)); info->text_ = newText; - s->stack[info->stackIndex_] = (ULONG)info->text_.c_str(); + s->stack[info->stackIndex_] = (ULONG)allocateString(info->text_); } // explicit TextHookW(int hookStackIndex, int role = Engine::UnknownRole) : stackIndex_(hookStackIndex), role_(role) {} template diff --git a/cpp/LunaHook/LunaHook/enginecollection32.cpp b/cpp/LunaHook/LunaHook/enginecollection32.cpp index e7b9bdef..8757658d 100644 --- a/cpp/LunaHook/LunaHook/enginecollection32.cpp +++ b/cpp/LunaHook/LunaHook/enginecollection32.cpp @@ -428,6 +428,7 @@ std::vector check_engines() new DAC, new AbogadoPowers, new e_Erekiteru, - new H_do_C + new H_do_C, + new Mink3, }; } \ No newline at end of file diff --git a/cpp/version.cmake b/cpp/version.cmake index 8b7bdc3a..9aac2a28 100644 --- a/cpp/version.cmake +++ b/cpp/version.cmake @@ -1,7 +1,7 @@ set(VERSION_MAJOR 6) set(VERSION_MINOR 17) -set(VERSION_PATCH 4) +set(VERSION_PATCH 5) set(VERSION_REVISION 0) set(LUNA_VERSION "{${VERSION_MAJOR},${VERSION_MINOR},${VERSION_PATCH},${VERSION_REVISION}}") add_library(VERSION_DEF ${CMAKE_CURRENT_LIST_DIR}/version_def.cpp) diff --git a/docs/en/useapis/ocrapi.md b/docs/en/useapis/ocrapi.md index 416084cf..48d1da8e 100644 --- a/docs/en/useapis/ocrapi.md +++ b/docs/en/useapis/ocrapi.md @@ -245,4 +245,8 @@ The following commands remove the OCR pack for "en-US": https://learn.microsoft.com/en-us/windows/powertoys/text-extractor#supported-languages +#### **Tesseract5** + +https://github.com/tesseract-ocr/tesseract/releases + \ No newline at end of file diff --git a/docs/ja/useapis/ocrapi.md b/docs/ja/useapis/ocrapi.md index 21eb79a2..8c6d64a6 100644 --- a/docs/ja/useapis/ocrapi.md +++ b/docs/ja/useapis/ocrapi.md @@ -246,4 +246,8 @@ State : NotPresent https://learn.microsoft.com/ja-jp/windows/powertoys/text-extractor#supported-languages +#### **Tesseract5** + +https://github.com/tesseract-ocr/tesseract/releases + diff --git a/docs/zh/useapis/ocrapi.md b/docs/zh/useapis/ocrapi.md index 1ae1765a..6236e9b0 100644 --- a/docs/zh/useapis/ocrapi.md +++ b/docs/zh/useapis/ocrapi.md @@ -281,5 +281,9 @@ State : NotPresent https://learn.microsoft.com/zh-cn/windows/powertoys/text-extractor#supported-languages +#### **Tesseract5** + +https://github.com/tesseract-ocr/tesseract/releases + diff --git a/py/LunaTranslator/gui/setting_about.py b/py/LunaTranslator/gui/setting_about.py index d0665efd..59891bb3 100644 --- a/py/LunaTranslator/gui/setting_about.py +++ b/py/LunaTranslator/gui/setting_about.py @@ -294,7 +294,10 @@ def delayloadlinks(key, lay): else: for link in source["links"]: __grid.append( - [link["name"], (makehtml(link["link"]), 2, "link")] + [ + link["name"], + (makehtml(link["link"], link.get("vis", None)), 2, "link"), + ] + ([link.get("about")] if link.get("about") else []) ) grid.append( diff --git a/py/LunaTranslator/ocrengines/tesseract5.py b/py/LunaTranslator/ocrengines/tesseract5.py index 77efbcf9..8b0f9e05 100644 --- a/py/LunaTranslator/ocrengines/tesseract5.py +++ b/py/LunaTranslator/ocrengines/tesseract5.py @@ -1,37 +1,110 @@ -import os, uuid, gobject -from myutils.config import _TR, ocrsetting +import os, uuid, gobject, winreg +from myutils.config import _TR, globalconfig from ocrengines.baseocrclass import baseocr from myutils.subproc import subproc_w - - -def list_langs(): - path = ocrsetting["tesseract5"]["args"]["路径"] - if os.path.exists(path) == False: - return [] - res = subproc_w( - '"{}" --list-langs'.format(path), needstdio=True, run=True, encoding="utf8" - ).stdout - return res.split("\n")[1:-1] +from language import Languages class OCR(baseocr): + + def findts__(self): + k = winreg.OpenKeyEx( + winreg.HKEY_LOCAL_MACHINE, + r"SOFTWARE\Tesseract-OCR", + 0, + winreg.KEY_QUERY_VALUE, + ) + base = winreg.QueryValueEx(k, "Path")[0] + winreg.CloseKey(k) + return base + + def findts(self): + try: + _ = self.findts__() + _ = os.path.join(_, "tesseract.exe") + return _ + except: + return + + def list_langs(self): + if not (self.path and os.path.exists(self.path)): + raise Exception(_TR("路径不存在")) + res = subproc_w( + '"{}" --list-langs'.format(self.path), + needstdio=True, + run=True, + encoding="utf8", + ).stdout + return res.split("\n")[1:-1] + + def langmap(self): + # https://github.com/tesseract-ocr/tessdoc/blob/main/tess3/Data-Files.md + return { + Languages.Chinese: "chi_sim", + Languages.TradChinese: "chi_tra", + Languages.Japanese: "jpn", + Languages.English: "eng", + Languages.Russian: "rus", + Languages.Korean: "kor", + Languages.Arabic: "ara", + Languages.Italian: "ita", + Languages.Polish: "pol", + Languages.Spanish: "spa", + Languages.Swedish: "swe", + Languages.Ukrainian: "ukr", + Languages.Vietnamese: "vie", + Languages.French: "fra", + Languages.Turkish: "tur", + Languages.German: "deu", + Languages.Dutch: "nld", + Languages.Portuguese: "por", + Languages.Czech: "ces", + Languages.Hungarian: "hun", + Languages.Thai: "tha", + Languages.Latin: "lat", + } + def initocr(self): - self.langs = list_langs() + self.path = self.findts() + self.langs = self.list_langs() + print(self.langs) def ocr(self, imagebinary): - self.checkempty(["路径"]) - path = self.config["路径"] - if os.path.exists(path) == False: - raise Exception(_TR("路径不存在")) - - fname = gobject.gettempdir(str(uuid.uuid4()) + ".png") - with open(fname, "wb") as ff: - ff.write(imagebinary) - imgfile = os.path.abspath(fname) + if not (self.path and os.path.exists(self.path)): + raise Exception(_TR("not installed")) + self.raise_cant_be_auto_lang() + lang = self.srclang + psm = 6 + imgfile = None + if globalconfig["verticalocr"] == 0: + pass + elif globalconfig["verticalocr"] == 1: + lang += "_vert" + psm = 5 + elif globalconfig["verticalocr"] == 2: + fname = gobject.gettempdir(str(uuid.uuid4()) + ".png") + with open(fname, "wb") as ff: + ff.write(imagebinary) + imgfile = os.path.abspath(fname) + _ = subproc_w( + '"{}" "{}" stdout -l osd --psm 0'.format(self.path, imgfile), + needstdio=True, + encoding="utf8", + run=True, + ) + err = _.stderr + if len(err): + pass + elif "Orientation in degrees: 0" not in _.stdout: + lang += "_vert" + psm = 5 + if not imgfile: + fname = gobject.gettempdir(str(uuid.uuid4()) + ".png") + with open(fname, "wb") as ff: + ff.write(imagebinary) + imgfile = os.path.abspath(fname) _ = subproc_w( - '"{}" "{}" - -l {} {}'.format( - path, imgfile, self.langs[self.config["语言"]], self.config["附加参数"] - ), + '"{}" "{}" - -l {} --psm {}'.format(self.path, imgfile, lang, psm), needstdio=True, encoding="utf8", run=True, diff --git a/py/files/defaultconfig/ocrsetting.json b/py/files/defaultconfig/ocrsetting.json index 3ba00bc8..29b4487a 100644 --- a/py/files/defaultconfig/ocrsetting.json +++ b/py/files/defaultconfig/ocrsetting.json @@ -150,27 +150,6 @@ "Secret Access Key": "" } }, - "tesseract5": { - "args": { - "路径": "", - "语言": 0, - "附加参数": "--psm 6" - }, - "argstype": { - "路径": { - "type": "file", - "dir": false, - "filter": "tesseract.exe" - }, - "语言": { - "type": "combo", - "list_function": [ - "ocrengines.tesseract5", - "list_langs" - ] - } - } - }, "googlecloudvision": { "args": { "key": ""