This commit is contained in:
恍兮惚兮 2024-11-04 09:46:14 +08:00
parent b395097057
commit bd51193464
4 changed files with 53 additions and 2 deletions

View File

@ -347,7 +347,7 @@ void TextHook::Send(uintptr_t lpDataBase)
} }
else else
{ {
if (hp.type & CODEC_UTF32) if (hp.type & CODEC_UTF32 || hp.type& CODEC_UTF8)
{ {
*(uint32_t *)pbData = lpDataIn & 0xffffffff; *(uint32_t *)pbData = lpDataIn & 0xffffffff;
} }
@ -595,6 +595,8 @@ int TextHook::GetLength(hook_stack *stack, uintptr_t in)
len = 2; len = 2;
else if (hp.type & CODEC_UTF32) else if (hp.type & CODEC_UTF32)
len = 4; len = 4;
else if (hp.type & CODEC_UTF8)
len = utf8charlen((char*)&in);
else else
{ // CODEC_ANSI_BE,CHAR_LITTLE_ENDIAN { // CODEC_ANSI_BE,CHAR_LITTLE_ENDIAN
if (hp.type & CODEC_ANSI_BE) if (hp.type & CODEC_ANSI_BE)

View File

@ -83,6 +83,8 @@ namespace
break; break;
case L'W': case L'W':
hp.type |= CODEC_UTF16; hp.type |= CODEC_UTF16;
case L'C':
hp.type |= CODEC_UTF8;
break; break;
case L'I': case L'I':
hp.type |= CODEC_UTF32; hp.type |= CODEC_UTF32;
@ -94,7 +96,7 @@ namespace
hp.type |= USING_STRING | CODEC_UTF16; hp.type |= USING_STRING | CODEC_UTF16;
break; break;
case L'M': case L'M':
hp.type |= SPECIAL_JIT_STRING | USING_STRING | CODEC_UTF16; hp.type |= USING_STRING | CODEC_UTF16 | SPECIAL_JIT_STRING;
break; break;
case L'U': case L'U':
hp.type |= USING_STRING | CODEC_UTF32; hp.type |= USING_STRING | CODEC_UTF32;
@ -333,6 +335,8 @@ namespace
{ {
if (hp.type & CODEC_UTF16) if (hp.type & CODEC_UTF16)
HCode += L'W'; HCode += L'W';
else if (hp.type & CODEC_UTF8)
HCode += L'C';
else if (hp.type & CODEC_UTF32) else if (hp.type & CODEC_UTF32)
HCode += L'I'; HCode += L'I';
else if (hp.type & CODEC_ANSI_BE) else if (hp.type & CODEC_ANSI_BE)

View File

@ -214,7 +214,51 @@ size_t u32strlen(uint32_t *data)
s++; s++;
return s; return s;
} }
// 检查一个字节是否是有效的 UTF-8 后续字节
int is_valid_following_byte(unsigned char byte)
{
return (byte & 0xC0) == 0x80; // 10xxxxxx
}
int utf8charlen(char *str)
{
if ((!str) || (!*str))
return 0;
unsigned char first_byte = (unsigned char)*str;
if ((first_byte & 0x80) == 0)
{
// 0xxxxxxx - 1 byte character
return 1;
}
else if ((first_byte & 0xE0) == 0xC0)
{
// 110xxxxx - 2 byte character
if (is_valid_following_byte((unsigned char)str[1]))
{
return 2;
}
}
else if ((first_byte & 0xF0) == 0xE0)
{
// 1110xxxx - 3 byte character
if (is_valid_following_byte((unsigned char)str[1]) &&
is_valid_following_byte((unsigned char)str[2]))
{
return 3;
}
}
else if ((first_byte & 0xF8) == 0xF0)
{
// 11110xxx - 4 byte character
if (is_valid_following_byte((unsigned char)str[1]) &&
is_valid_following_byte((unsigned char)str[2]) &&
is_valid_following_byte((unsigned char)str[3]))
{
return 4;
}
}
return 0; // 不是有效的UTF-8序列
}
std::string wcasta(const std::wstring &x) std::string wcasta(const std::wstring &x)
{ {
std::string xx; std::string xx;

View File

@ -36,6 +36,7 @@ std::optional<std::wstring> StringToWideString(const std::string &text, UINT enc
std::string wcasta(const std::wstring& x); std::string wcasta(const std::wstring& x);
std::wstring acastw(const std::string& x); std::wstring acastw(const std::string& x);
size_t u32strlen(uint32_t *data); size_t u32strlen(uint32_t *data);
int utf8charlen(char *data);
inline bool disable_mbwc = false; inline bool disable_mbwc = false;
inline bool disable_wcmb = false; inline bool disable_wcmb = false;
template <class ST> template <class ST>