From bd51193464fa99585459d7f881bfd86dfa01e9af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=81=8D=E5=85=AE=E6=83=9A=E5=85=AE?= <1173718158@qq.com>
Date: Mon, 4 Nov 2024 09:46:14 +0800
Subject: [PATCH] UTF8

---
 LunaHook/texthook.cc    |  4 +++-
 include/hookcode.cpp    |  6 +++++-
 include/stringutils.cpp | 44 +++++++++++++++++++++++++++++++++++++++++
 include/stringutils.h   |  1 +
 4 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/LunaHook/texthook.cc b/LunaHook/texthook.cc
index 5f28856..fce298a 100644
--- a/LunaHook/texthook.cc
+++ b/LunaHook/texthook.cc
@@ -347,7 +347,7 @@ void TextHook::Send(uintptr_t lpDataBase)
 			}
 			else
 			{
-				if (hp.type & CODEC_UTF32)
+				if (hp.type & CODEC_UTF32 || hp.type& CODEC_UTF8)
 				{
 					*(uint32_t *)pbData = lpDataIn & 0xffffffff;
 				}
@@ -595,6 +595,8 @@ int TextHook::GetLength(hook_stack *stack, uintptr_t in)
 			len = 2;
 		else if (hp.type & CODEC_UTF32)
 			len = 4;
+		else if (hp.type & CODEC_UTF8)
+			len = utf8charlen((char*)&in);
 		else
 		{ // CODEC_ANSI_BE,CHAR_LITTLE_ENDIAN
 			if (hp.type & CODEC_ANSI_BE)
diff --git a/include/hookcode.cpp b/include/hookcode.cpp
index a561930..a95cb45 100644
--- a/include/hookcode.cpp
+++ b/include/hookcode.cpp
@@ -83,6 +83,8 @@ namespace
 			break;
 		case L'W':
 			hp.type |= CODEC_UTF16;
+		case L'C':
+			hp.type |= CODEC_UTF8;
 			break;
 		case L'I':
 			hp.type |= CODEC_UTF32;
@@ -94,7 +96,7 @@ namespace
 			hp.type |= USING_STRING | CODEC_UTF16;
 			break;
 		case L'M':
-			hp.type |= SPECIAL_JIT_STRING | USING_STRING | CODEC_UTF16;
+			hp.type |= USING_STRING | CODEC_UTF16 | SPECIAL_JIT_STRING;
 			break;
 		case L'U':
 			hp.type |= USING_STRING | CODEC_UTF32;
@@ -333,6 +335,8 @@ namespace
 		{
 			if (hp.type & CODEC_UTF16)
 				HCode += L'W';
+			else if (hp.type & CODEC_UTF8)
+				HCode += L'C';
 			else if (hp.type & CODEC_UTF32)
 				HCode += L'I';
 			else if (hp.type & CODEC_ANSI_BE)
diff --git a/include/stringutils.cpp b/include/stringutils.cpp
index 4330fa8..6d53264 100644
--- a/include/stringutils.cpp
+++ b/include/stringutils.cpp
@@ -214,7 +214,51 @@ size_t u32strlen(uint32_t *data)
     s++;
   return s;
 }
+// 检查一个字节是否是有效的 UTF-8 后续字节
+int is_valid_following_byte(unsigned char byte)
+{
+  return (byte & 0xC0) == 0x80; // 10xxxxxx
+}
+int utf8charlen(char *str)
+{
+  if ((!str) || (!*str))
+    return 0;
+  unsigned char first_byte = (unsigned char)*str;
 
+  if ((first_byte & 0x80) == 0)
+  {
+    // 0xxxxxxx - 1 byte character
+    return 1;
+  }
+  else if ((first_byte & 0xE0) == 0xC0)
+  {
+    // 110xxxxx - 2 byte character
+    if (is_valid_following_byte((unsigned char)str[1]))
+    {
+      return 2;
+    }
+  }
+  else if ((first_byte & 0xF0) == 0xE0)
+  {
+    // 1110xxxx - 3 byte character
+    if (is_valid_following_byte((unsigned char)str[1]) &&
+        is_valid_following_byte((unsigned char)str[2]))
+    {
+      return 3;
+    }
+  }
+  else if ((first_byte & 0xF8) == 0xF0)
+  {
+    // 11110xxx - 4 byte character
+    if (is_valid_following_byte((unsigned char)str[1]) &&
+        is_valid_following_byte((unsigned char)str[2]) &&
+        is_valid_following_byte((unsigned char)str[3]))
+    {
+      return 4;
+    }
+  }
+  return 0; // 不是有效的UTF-8序列
+}
 std::string wcasta(const std::wstring &x)
 {
   std::string xx;
diff --git a/include/stringutils.h b/include/stringutils.h
index e8bd909..b0128f4 100644
--- a/include/stringutils.h
+++ b/include/stringutils.h
@@ -36,6 +36,7 @@ std::optional<std::wstring> StringToWideString(const std::string &text, UINT enc
 std::string wcasta(const std::wstring& x);
 std::wstring acastw(const std::string& x);
 size_t u32strlen(uint32_t *data);
+int utf8charlen(char *data);
 inline bool disable_mbwc = false;
 inline bool disable_wcmb = false;
 template <class ST>