From f316813283a4ca1d1ea51e6d7defc6cff67c74d9 Mon Sep 17 00:00:00 2001 From: Blu3train Date: Sun, 17 Dec 2023 18:42:15 +0100 Subject: [PATCH] Ren'py 2.x and 3.x engine hook --- texthook/engine/engine.cc | 86 +++++-- texthook/engine/match64.cc | 478 ++++++++++++++++++++++++++++++++++++- 2 files changed, 539 insertions(+), 25 deletions(-) diff --git a/texthook/engine/engine.cc b/texthook/engine/engine.cc index c2c7abc..26b3bf9 100644 --- a/texthook/engine/engine.cc +++ b/texthook/engine/engine.cc @@ -16945,6 +16945,7 @@ bool InsertAdobeFlash10Hook() */ bool InsertRenpyHook() { + bool ok = false; wchar_t python[] = L"python2X.dll", libpython[] = L"libpython2.X.dll"; for (wchar_t* name : { python, libpython }) { @@ -16957,32 +16958,77 @@ bool InsertRenpyHook() wcscpy_s(spDefault.exportModule, name); HookParam hp = {}; hp.address = (DWORD)GetProcAddress(module, "PyUnicodeUCS2_Format"); - if (!hp.address) + if (hp.address) { - ConsoleOutput("Textractor: Ren'py failed: failed to find PyUnicodeUCS2_Format"); - return false; - } - hp.offset = 4; - hp.index = 0xc; - hp.length_offset = 0; - //hp.split = pusha_ebx_off - 4; - hp.text_fun = [](auto, auto, auto, DWORD* data, DWORD* split, DWORD* count) + hp.offset = 4; + hp.index = 0xc; + hp.length_offset = 0; + //hp.split = pusha_ebx_off - 4; + hp.type = USING_STRING | USING_UNICODE | NO_CONTEXT | DATA_INDIRECT/* | USING_SPLIT*/; + hp.filter_fun = [](LPVOID data, DWORD *size, HookParam *, BYTE) + { + static std::wstring prevText; + auto text = reinterpret_cast(data); + auto len = reinterpret_cast(size); + + if (cpp_wcsnstr(text, L"%", *len/sizeof(wchar_t))) + return false; + if (cpp_wcsnstr(text, L"{", *len/sizeof(wchar_t))) { + WideStringCharReplacer(text, len, L"{i}", 3, L'\''); + WideStringCharReplacer(text, len, L"{/i}", 4, L'\''); + WideStringFilterBetween(text, len, L"{", 1, L"}", 1); + } + WideStringFilter(text, len, L"^", 2); // remove ^ followed by 1 char + WideCharReplacer(text, len, L'\n', L' '); + + if (prevText.length()==*len/sizeof(wchar_t) && prevText.find(text, 0, *len/sizeof(wchar_t))!=std::string::npos) // Check if the string is the same as the previous one + return false; + prevText.assign(text, *len/sizeof(wchar_t)); + + return true; + }; + NewHook(hp, "Ren'py UCS2Format"); + ok = true; + } + hp.address = (DWORD)GetProcAddress(module, "PyUnicodeUCS2_Replace"); + if (hp.address) { - *data = *(DWORD*)(*data + 0xc); - *count = wcslen((wchar_t*)*data) * sizeof(wchar_t); - *split = wcschr((wchar_t*)*data, L'%') == nullptr; - }; - hp.type = USING_STRING | USING_UNICODE | NO_CONTEXT | DATA_INDIRECT/* | USING_SPLIT*/; - //hp.filter_fun = [](void* str, auto, auto, auto) { return *(wchar_t*)str != L'%'; }; - NewHook(hp, "Ren'py"); - return true; + hp.offset = 3 * 4; //arg 3; + hp.index = 0xC; + hp.length_offset = 0; + hp.split = 2 * 4; //arg 2; + hp.type = USING_STRING | USING_UNICODE | DATA_INDIRECT | USING_SPLIT; + hp.filter_fun = [](LPVOID data, DWORD *size, HookParam *, BYTE) + { + static std::wstring prevText; + auto text = reinterpret_cast(data); + auto len = reinterpret_cast(size); + + if (cpp_wcsnstr(text, L"{fast}", *len/sizeof(wchar_t))) + return false; + if (cpp_wcsnstr(text, L"{", *len/sizeof(wchar_t))) { + WideStringCharReplacer(text, len, L"{i}", 3, L'\''); + WideStringCharReplacer(text, len, L"{/i}", 4, L'\''); + WideStringFilterBetween(text, len, L"{", 1, L"}", 1); + } + WideCharReplacer(text, len, L'\n', L' '); + + if (prevText.length()==*len/sizeof(wchar_t) && prevText.find(text, 0, *len/sizeof(wchar_t))!=std::string::npos) // Check if the string is the same as the previous one + return false; + prevText.assign(text, *len/sizeof(wchar_t)); + + return true; + }; + NewHook(hp, "Ren'py UCS2Replace"); + ok = true; + } } } } - ConsoleOutput("Textractor: Ren'py failed: failed to find python2X.dll"); - return false; + if ( !ok ) + ConsoleOutput("Textractor: Ren'py failed: failed to find python2X.dll"); + return ok; } - void InsertMonoHook(HMODULE h) { static HMODULE mono = h; diff --git a/texthook/engine/match64.cc b/texthook/engine/match64.cc index 859f746..d384f8f 100644 --- a/texthook/engine/match64.cc +++ b/texthook/engine/match64.cc @@ -6,9 +6,336 @@ #include "mono/funcinfo.h" #include "engine.h" #include "util.h" +#include "cpputil/cppcstring.h" + +// Warning: The offset in ITH has -4 offset comparing to pusha and AGTH +enum pusha_off +{ + pusha_rax_off = -0xC, + pusha_rbx_off = -0x14, + pusha_rcx_off = -0x1c, + pusha_rdx_off = -0x24, + pusha_rsp_off = -0x2c, + pusha_rbp_off = -0x34, + pusha_rsi_off = -0x3c, + pusha_rdi_off = -0x44, + pusha_r8_off = -0x4c, + pusha_r9_off = -0x54, + pusha_r10_off = -0x5c, + pusha_r11_off = -0x64, + pusha_r12_off = -0x6c, + pusha_r13_off = -0x74, + pusha_r14_off = -0x7c, + pusha_r15_off = -0x84, + pusha_off = -0x8c // pushad offset +}; + +#define retof(rsp_base) *(uintptr_t *)(rsp_base) // return address +#define regof(name, rsp_base) *(uintptr_t *)((rsp_base) + pusha_##name##_off - 4) +#define argof(count, rsp_base) *(uintptr_t *)((rsp_base) + 4 * (count)) // starts from 1 instead of 0 + +enum { VNR_TEXT_CAPACITY = 1500 }; // estimated max number of bytes allowed in VNR, slightly larger than VNR's text limit (1000) + +namespace { // unnamed helpers + +#define XX2 XX,XX // WORD +#define XX4 XX2,XX2 // DWORD +#define XX8 XX4,XX4 // QWORD + +// jichi 8/18/2013: Original maximum relative address in ITH +//enum { MAX_REL_ADDR = 0x200000 }; + +// jichi 10/1/2013: Increase relative address limit. Certain game engine like Artemis has larger code region +enum : DWORD { MAX_REL_ADDR = 0x00300000 }; + +static union { + char text_buffer[0x1000]; + wchar_t wc_buffer[0x800]; + + struct { // CodeSection + DWORD base; + DWORD size; + } code_section[0x200]; +}; +DWORD text_buffer_length; + +// 7/29/2014 jichi: I should move these functions to different files +// String utilities +// Return the address of the first non-zero address +LPCSTR reverse_search_begin(const char *s, int maxsize = VNR_TEXT_CAPACITY) +{ + if (*s) + for (int i = 0; i < maxsize; i++, s--) + if (!*s) + return s + 1; + return nullptr; +} + +bool all_ascii(const char *s, int maxsize = VNR_TEXT_CAPACITY) +{ + if (s) + for (int i = 0; i < maxsize && *s; i++, s++) + if ((BYTE)*s > 127) // unsigned char + return false; + return true; +} + +bool all_ascii(const wchar_t *s, int maxsize = VNR_TEXT_CAPACITY) +{ + if (s) + for (int i = 0; i < maxsize && *s; i++, s++) + if (*s > 127) // unsigned char + return false; + return true; +} + +// String filters + +void CharReplacer(char *str, size_t *size, char fr, char to) +{ + size_t len = *size; + for (size_t i = 0; i < len; i++) + if (str[i] == fr) + str[i] = to; +} + +void WideCharReplacer(wchar_t *str, size_t *size, wchar_t fr, wchar_t to) +{ + size_t len = *size / 2; + for (size_t i = 0; i < len; i++) + if (str[i] == fr) + str[i] = to; +} + +void CharFilter(char *str, size_t *size, char ch) +{ + size_t len = *size, + curlen; + for (char *cur = (char *)::memchr(str, ch, len); + (cur && --len && (curlen = len - (cur - str))); + cur = (char *)::memchr(cur, ch, curlen)) + ::memmove(cur, cur + 1, curlen); + *size = len; +} + +void WideCharFilter(wchar_t *str, size_t *size, wchar_t ch) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnchr(str, ch, len); + (cur && --len && (curlen = len - (cur - str))); + cur = cpp_wcsnchr(cur, ch, curlen)) + ::memmove(cur, cur + 1, 2 * curlen); + *size = len * 2; +} + +void CharsFilter(char *str, size_t *size, const char *chars) +{ + size_t len = *size, + curlen; + for (char *cur = cpp_strnpbrk(str, chars, len); + (cur && --len && (curlen = len - (cur - str))); + cur = cpp_strnpbrk(cur, chars, curlen)) + ::memmove(cur, cur + 1, curlen); + *size = len; +} + +void WideCharsFilter(wchar_t *str, size_t *size, const wchar_t *chars) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnpbrk(str, chars, len); + (cur && --len && (curlen = len - (cur - str))); + cur = cpp_wcsnpbrk(cur, chars, curlen)) + ::memmove(cur, cur + 1, 2 * curlen); + *size = len * 2; +} + +void StringFilter(char *str, size_t *size, const char *remove, size_t removelen) +{ + size_t len = *size, + curlen; + for (char *cur = cpp_strnstr(str, remove, len); + (cur && (len -= removelen) && (curlen = len - (cur - str))); + cur = cpp_strnstr(cur, remove, curlen)) + ::memmove(cur, cur + removelen, curlen); + *size = len; +} + +void WideStringFilter(wchar_t *str, size_t *size, const wchar_t *remove, size_t removelen) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnstr(str, remove, len); + (cur && (len -= removelen) && (curlen = len - (cur - str))); + cur = cpp_wcsnstr(cur, remove, curlen)) + ::memmove(cur, cur + removelen, 2 * curlen); + *size = len * 2; +} + +void StringFilterBetween(char *str, size_t *size, const char *fr, size_t frlen, const char *to, size_t tolen) +{ + size_t len = *size, + curlen; + for (char *cur = cpp_strnstr(str, fr, len); + cur; + cur = cpp_strnstr(cur, fr, curlen)) { + curlen = (len - frlen) - (cur - str); + auto end = cpp_strnstr(cur + frlen, to, curlen); + if (!end) + break; + curlen = len - (end - str) - tolen; + ::memmove(cur, end + tolen, curlen); + len -= tolen + (end - cur); + } + *size = len; +} + +void WideStringFilterBetween(wchar_t *str, size_t *size, const wchar_t *fr, size_t frlen, const wchar_t *to, size_t tolen) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnstr(str, fr, len); + cur; + cur = cpp_wcsnstr(cur, fr, curlen)) { + curlen = (len - frlen) - (cur - str); + auto end = cpp_wcsnstr(cur + frlen, to, curlen); + if (!end) + break; + curlen = len - (end - str) - tolen; + ::memmove(cur, end + tolen, 2 * curlen); + len -= tolen + (end - cur); + } + *size = len * 2; +} + +void StringCharReplacer(char *str, size_t *size, const char *src, size_t srclen, char ch) +{ + size_t len = *size, + curlen; + for (char *cur = cpp_strnstr(str, src, len); + cur && len; + cur = cpp_strnstr(cur, src, curlen)) { + *cur++ = ch; + len -= srclen - 1; + curlen = len - (cur - str); + if (curlen == 0) + break; + ::memmove(cur, cur + srclen - 1, curlen); + } + *size = len; +} + +void WideStringCharReplacer(wchar_t *str, size_t *size, const wchar_t *src, size_t srclen, wchar_t ch) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnstr(str, src, len); + cur && len; + cur = cpp_wcsnstr(cur, src, curlen)) { + *cur++ = ch; + len -= srclen - 1; + curlen = len - (cur - str); + if (curlen == 0) + break; + ::memmove(cur, cur + srclen -1, 2 * curlen); + } + *size = len * 2; +} + +// NOTE: I assume srclen >= dstlen +void StringReplacer(char *str, size_t *size, const char *src, size_t srclen, const char *dst, size_t dstlen) +{ + size_t len = *size, + curlen; + for (char *cur = cpp_strnstr(str, src, len); + cur && len; + cur = cpp_strnstr(cur, src, curlen)) { + ::memcpy(cur, dst, dstlen); + cur += dstlen; + len -= srclen - dstlen; + curlen = len - (cur - str); + if (curlen == 0) + break; + if (srclen > dstlen) + ::memmove(cur, cur + srclen - dstlen, curlen); + } + *size = len; +} + +void WideStringReplacer(wchar_t *str, size_t *size, const wchar_t *src, size_t srclen, const wchar_t *dst, size_t dstlen) +{ + size_t len = *size / 2, + curlen; + for (wchar_t *cur = cpp_wcsnstr(str, src, len); + cur && len; + cur = cpp_wcsnstr(cur, src, curlen)) { + ::memcpy(cur, dst, 2 * dstlen); + cur += dstlen; + len -= srclen - dstlen; + curlen = len - (cur - str); + if (curlen == 0) + break; + if (srclen > dstlen) + ::memmove(cur, cur + srclen - dstlen, 2 * curlen); + } + *size = len * 2; +} + +bool NewLineCharFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + CharFilter(reinterpret_cast(data), reinterpret_cast(size), + '\n'); + return true; +} +bool NewLineWideCharFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + CharFilter(reinterpret_cast(data), reinterpret_cast(size), + L'\n'); + return true; +} +bool NewLineStringFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + StringFilter(reinterpret_cast(data), reinterpret_cast(size), + "\\n", 2); + return true; +} +bool NewLineWideStringFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + WideStringFilter(reinterpret_cast(data), reinterpret_cast(size), + L"\\n", 2); + return true; +} +bool NewLineCharToSpaceFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + CharReplacer(reinterpret_cast(data), reinterpret_cast(size), '\n', ' '); + return true; +} +bool NewLineWideCharToSpaceFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + WideCharReplacer(reinterpret_cast(data), reinterpret_cast(size), L'\n', L' '); + return true; +} + +// Remove every characters <= 0x1f (i.e. before space ' ') except 0xa and 0xd. +bool IllegalCharsFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + CharsFilter(reinterpret_cast(data), reinterpret_cast(size), + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f\x10\x11\x12\x12\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"); + return true; +} +bool IllegalWideCharsFilter(LPVOID data, DWORD *size, HookParam *, BYTE) +{ + WideCharsFilter(reinterpret_cast(data), reinterpret_cast(size), + L"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f\x10\x11\x12\x12\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"); + return true; +} + +} // unnamed namespace namespace Engine { + enum : DWORD { X64_MAX_REL_ADDR = 0x00300000 }; /** Artikash 6/7/2019 * PPSSPP JIT code has pointers, but they are all added to an offset before being used. Find that offset so that hook searching works properly. @@ -193,18 +520,40 @@ namespace Engine { wcscpy_s(spDefault.exportModule, name); HookParam hp = {}; - hp.address = (DWORD)GetProcAddress(module, "PyUnicodeUCS2_Format"); + hp.address = (uintptr_t)GetProcAddress(module, "PyUnicodeUCS2_Format"); if (!hp.address) { ConsoleOutput("Textractor: Ren'py failed: failed to find PyUnicodeUCS2_Format"); return false; } - hp.offset = -0x20; // rcx + hp.offset = pusha_rcx_off -4; // rcx hp.index = 0x18; hp.length_offset = 0; - //hp.split = pusha_ebx_off - 4; + //hp.split = pusha_rsp_off -4; hp.type = USING_STRING | USING_UNICODE | NO_CONTEXT | DATA_INDIRECT /* | USING_SPLIT*/; - //hp.filter_fun = [](void* str, auto, auto, auto) { return *(wchar_t*)str != L'%'; }; + hp.filter_fun = [](LPVOID data, DWORD *size, HookParam *, BYTE) + { + static std::wstring prevText; + auto text = reinterpret_cast(data); + auto len = reinterpret_cast(size); + + if (cpp_wcsnstr(text, L"%", *len/sizeof(wchar_t))) + return false; + + if (cpp_wcsnstr(text, L"{", *len/sizeof(wchar_t))) { + WideStringCharReplacer(text, len, L"{i}", 3, L'\''); + WideStringCharReplacer(text, len, L"{/i}", 4, L'\''); + WideStringFilterBetween(text, len, L"{", 1, L"}", 1); + } + WideStringFilter(text, len, L"^", 2); // remove ^ followed by 1 char + WideCharReplacer(text, len, L'\n', L' '); + + if (prevText.length()==*len/sizeof(wchar_t) && prevText.find(text, 0, *len/sizeof(wchar_t))!=std::string::npos) // Check if the string is the same as the previous one + return false; + prevText.assign(text, *len/sizeof(wchar_t)); + + return true; + }; NewHook(hp, "Ren'py"); return true; } @@ -213,7 +562,126 @@ namespace Engine ConsoleOutput("Textractor: Ren'py failed: failed to find python2X.dll"); return false; } + bool InsertRenpy3Hook() + { + //by Blu3train + /* + * Sample games: + * https://vndb.org/v45820 + * https://vndb.org/v26318 + * https://vndb.org/v39954 + * https://vndb.org/r110220 + * https://vndb.org/r114981 + * https://vndb.org/v33647 + * https://vndb.org/r73160 + * https://vndb.org/v44518 + * https://vndb.org/v31994 + * https://vndb.org/r115756 + */ + wchar_t python[] = L"python3X.dll", libpython[] = L"libpython3.X.dll"; + for (wchar_t* name : { python, libpython }) + { + wchar_t* pos = wcschr(name, L'X'); + for (int pythonMinorVersion = 0; pythonMinorVersion <= 9; ++pythonMinorVersion) + { + *pos = L'0' + pythonMinorVersion; + if (HMODULE module = GetModuleHandleW(name)) + { + wcscpy_s(spDefault.exportModule, name); + HookParam hp = {}; + hp.address = (uintptr_t)GetProcAddress(module, "PyUnicode_Format"); + if (!hp.address) + { + ConsoleOutput("Textractor: Ren'py 3 failed: failed to find PyUnicode_Format"); + return false; + } + hp.offset = pusha_rcx_off -4; // rcx + hp.padding = 0x48; + hp.length_offset = 0; + hp.text_fun = [](DWORD rsp_base, HookParam *pHp, BYTE, DWORD* data, DWORD* split, DWORD* count) + { + uint64_t r10 = regof(r10, rsp_base); + uint64_t r11 = regof(r11, rsp_base); + if (r10==0x03FF || r11==0x03FF) { + uint64_t rcx = regof(rcx, rsp_base); + BYTE unicode = !(*(BYTE*)(rcx + 0x20) & 0x40); // [rcx+0x20) bit 0x40 == 0 + if (unicode) { + *data += 0x48; //padding + *count = wcslen((wchar_t*)*data) * sizeof(wchar_t); + return; + } + } + *count = 0; + }; + hp.type = USING_STRING | USING_UNICODE | NO_CONTEXT; + hp.filter_fun = [](LPVOID data, DWORD *size, HookParam *, BYTE) + { + auto text = reinterpret_cast(data); + auto len = reinterpret_cast(size); + if (cpp_wcsnstr(text, L"%", *len/sizeof(wchar_t))) + return false; + if (cpp_wcsnstr(text, L"{", *len/sizeof(wchar_t))) { + WideStringCharReplacer(text, len, L"{i}", 3, L'\''); + WideStringCharReplacer(text, len, L"{/i}", 4, L'\''); + WideStringFilterBetween(text, len, L"{", 1, L"}", 1); + } + + //CP_OEMCP -The current system OEM code page + WideCharToMultiByte(CP_OEMCP, 0, text, -1, text_buffer, 0x1000, NULL, NULL); + text_buffer_length = *len/sizeof(wchar_t); // saved for not unicode hook + + return true; + }; + NewHook(hp, "Ren'py 3 unicode"); + + hp.address += 6; + hp.padding = 0x30; + hp.text_fun = [](DWORD rsp_base, HookParam *pHp, BYTE, DWORD* data, DWORD* split, DWORD* count) + { + uint64_t r10 = regof(r10, rsp_base); + uint64_t r11 = regof(r11, rsp_base); + if (r10==0x03FF || r11==0x03FF) { + uint64_t rcx = regof(rcx, rsp_base); + BYTE unicode = !(*(BYTE*)(rcx + 0x20) & 0x40); // [rcx+0x20) bit 0x40 == 0 + + *data += unicode ? 0x48 : 0x30; //padding + *count = ::strlen((char*)*data); + if (!cpp_strnstr((char*)*data, "%", *count)) // not garbage + return; + } + *count = 0; + }; + hp.type = USING_STRING | NO_CONTEXT; + hp.filter_fun = [](LPVOID data, DWORD *size, HookParam *, BYTE) + { + auto text = reinterpret_cast(data); + auto len = reinterpret_cast(size); + + if (text[0]!=0 && text[1]==0) { + // text from unicode hook + *len = text_buffer_length; + ::memmove(text, text_buffer, *len); + } + if (cpp_strnstr(text, "%", *len)) + return false; + if (cpp_strnstr(text, "{", *len)) { + StringCharReplacer(text, len, "{i}", 3, L'\''); + StringCharReplacer(text, len, "{/i}", 4, L'\''); + StringFilterBetween(text, len, "{", 1, "}", 1); + } + + return true; + }; + NewHook(hp, "Ren'py 3"); + + return true; + } + } + } + ConsoleOutput("Textractor: Ren'py 3 failed: failed to find python3X.dll"); + return false; + } bool UnsafeDetermineEngineType() { if (Util::CheckFile(L"PPSSPP*.exe") && FindPPSSPP()) return true; @@ -228,7 +696,7 @@ namespace Engine return true; } - if (Util::CheckFile(L"*.py") && InsertRenpyHook()) return true; + if (Util::CheckFile(L"*.py") && InsertRenpyHook() || InsertRenpy3Hook()) return true; for (const wchar_t* monoName : { L"mono.dll", L"mono-2.0-bdwgc.dll" }) if (HMODULE module = GetModuleHandleW(monoName)) if (InsertMonoHooks(module)) return true;