mirror of
https://github.com/HIllya51/LunaHook.git
synced 2024-11-23 22:05:36 +08:00
263 lines
8.3 KiB
C++
263 lines
8.3 KiB
C++
// qtdynsjis.cc
|
|
// 6/3/2015 jichi
|
|
// http://en.wikipedia.org/wiki/Shift_JIS
|
|
#include "dynsjiscodec.h"
|
|
#ifdef __clang__
|
|
# pragma GCC diagnostic ignored "-Wlogical-op-parentheses"
|
|
#endif // __clang__
|
|
|
|
//#ifdef _MSC_VER
|
|
//# pragma warning(disable:4018) // C4018: signed/unsigned mismatch
|
|
//#endif // _MSC_VER
|
|
|
|
//#define SK_NO_QT
|
|
//#define DEBUG "dynsjis.cc"
|
|
//#include "sakurakit/skdebug.h"
|
|
|
|
/** Private class */
|
|
|
|
// See also LeadByte table for Windows:
|
|
//
|
|
// BYTE LeadByteTable[0x100] = {
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
// 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
// 2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
// 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
// 2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1
|
|
// };
|
|
//
|
|
// -2: 0x00 and 0xff are skipped
|
|
|
|
class DynamicShiftJISCodecPrivate
|
|
{
|
|
public:
|
|
UINT codepage;
|
|
std::wstring text; // already saved characters
|
|
|
|
UINT minimumSecondByte;
|
|
|
|
explicit DynamicShiftJISCodecPrivate(UINT codepage_)
|
|
: codepage(932)
|
|
, minimumSecondByte(0)
|
|
{
|
|
codepage = codepage_;
|
|
}
|
|
|
|
size_t capacity() const
|
|
{
|
|
// See: http://en.wikipedia.org/wiki/Shift_JIS
|
|
return // = 7739
|
|
(3 * 16 - 1) * (4 * 16 + 4 - 1 - minimumSecondByte) // = 3149, 0x00 are skipped
|
|
+ (16 + 2) * (256 - 1 - minimumSecondByte) // = 4590, first/last byte unused
|
|
;
|
|
}
|
|
bool isFull() const { return text.size() >= capacity(); }
|
|
std::string encodeSTD(const wchar_t* text, size_t length, bool* dynamic);
|
|
|
|
std::string encode(const wchar_t *text, size_t length, bool *dynamic);
|
|
std::wstring decode (const char* data, size_t length, bool* dynamic) const;
|
|
|
|
private:
|
|
std::string encodeCharSTD(wchar_t ch);
|
|
wchar_t decodeChar(UINT8 ch1, UINT8 ch2) const;
|
|
};
|
|
|
|
// Encode
|
|
std::string DynamicShiftJISCodecPrivate::encodeSTD(const wchar_t* text, size_t length, bool* dynamic)
|
|
{
|
|
std::string ret;
|
|
for (size_t i = 0; i < length; i++) {
|
|
wchar_t ch = text[i];
|
|
if (ch <= 127)
|
|
ret.push_back(ch);
|
|
else {
|
|
std::wstring ws;
|
|
ws.push_back(ch);
|
|
std::string data = WideStringToString(ws, codepage);
|
|
if (StringToWideString(WideStringToString(ws, codepage),codepage)!=ws) { // failed to decode
|
|
data = encodeCharSTD(ch);
|
|
if (!data.empty() && dynamic)
|
|
*dynamic = true;
|
|
}
|
|
ret.append(data);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
std::string DynamicShiftJISCodecPrivate::encodeCharSTD(wchar_t ch)
|
|
{
|
|
std::string ret;
|
|
size_t i = text.find(ch);
|
|
if (i == std::wstring::npos) {
|
|
if (isFull())
|
|
return ret;
|
|
i = text.size();
|
|
text.push_back(ch);
|
|
}
|
|
if (i < 31 * (4 * 16 + 4 - 1 - minimumSecondByte)) {
|
|
int v1 = i / (4 * 16 + 4 - 1 - minimumSecondByte) + 0x81,
|
|
v2 = i % (4 * 16 + 4 - 1 - minimumSecondByte) + 1 + minimumSecondByte;
|
|
if (v2 == 0x40)
|
|
v2 = 0x7f;
|
|
else if (v2 >= 0x41)
|
|
v2 += 0xfd - 0x41;
|
|
ret.push_back(v1);
|
|
ret.push_back(v2);
|
|
return ret;
|
|
}
|
|
i -= 31 * (4 * 16 + 4 - 1 - minimumSecondByte);
|
|
if (i < 16 * (4 * 16 + 4 - 1 - minimumSecondByte)) {
|
|
int v1 = i / (4 * 16 + 4 - 1 - minimumSecondByte) + 0xe0,
|
|
v2 = i % (4 * 16 + 4 - 1 - minimumSecondByte) + 1 + minimumSecondByte;
|
|
if (v2 == 0x40)
|
|
v2 = 0x7f;
|
|
else if (v2 >= 0x41)
|
|
v2 += 0xfd - 0x41;
|
|
ret.push_back(v1);
|
|
ret.push_back(v2);
|
|
return ret;
|
|
}
|
|
i -= 16 * (4 * 16 + 4 - 1 - minimumSecondByte);
|
|
if (i < 256 - 1 - minimumSecondByte) {
|
|
int v1 = 0x80,
|
|
v2 = i % (256 - 1 - minimumSecondByte) + 1 + minimumSecondByte;
|
|
ret.push_back(v1);
|
|
ret.push_back(v2);
|
|
return ret;
|
|
}
|
|
i -= 256 - 1 - minimumSecondByte;
|
|
if (i < 256 - 1 - minimumSecondByte) {
|
|
int v1 = 0xa0,
|
|
v2 = i % (256 - 1 - minimumSecondByte) + 1 + minimumSecondByte;
|
|
ret.push_back(v1);
|
|
ret.push_back(v2);
|
|
return ret;
|
|
}
|
|
i -= 256 - 1 - minimumSecondByte;
|
|
if (i < 16 * (256 - 1 - minimumSecondByte)) {
|
|
int v1 = i / (256 - 1 - minimumSecondByte) + 0xf0,
|
|
v2 = i % (256 - 1 - minimumSecondByte) + 1 + minimumSecondByte;
|
|
ret.push_back(v1);
|
|
ret.push_back(v2);
|
|
return ret;
|
|
}
|
|
// This return should be unreachable
|
|
return ret;
|
|
}
|
|
// Decode
|
|
|
|
std::wstring DynamicShiftJISCodecPrivate::decode(const char* data, size_t length, bool* dynamic) const
|
|
{
|
|
std::wstring ret;
|
|
for (size_t i = 0; i < length; i++) {
|
|
UINT8 ch = (UINT8)data[i];
|
|
if (ch <= 127)
|
|
ret.push_back(ch);
|
|
else if (ch >= 0xa1 && ch <= 0xdf) // size == 1
|
|
ret.append(StringToWideString(std::string(data + 1, 1), codepage).value());
|
|
else {
|
|
if (i + 1 == length) // no enough character
|
|
return ret;
|
|
UINT8 ch2 = (UINT8)data[++i];
|
|
if ((ch >= 0x81 && ch <= 0x9f || ch >= 0xe0 && ch <= 0xef)
|
|
&& (ch2 != 0x7f && ch2 >= 0x40 && ch2 <= 0xfc))
|
|
ret.append(StringToWideString(std::string(data + i - 1, 2), codepage).value());
|
|
else if (wchar_t c = decodeChar(ch, ch2)) {
|
|
ret.push_back(c);
|
|
if (dynamic)
|
|
*dynamic = true;
|
|
}
|
|
else
|
|
ret.push_back(ch + (wchar_t(ch2) << 8)); // preserve the original character
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
wchar_t DynamicShiftJISCodecPrivate::decodeChar(UINT8 ch1, UINT8 ch2) const
|
|
{
|
|
if (text.empty())
|
|
return 0;
|
|
if (minimumSecondByte && ch2 < minimumSecondByte)
|
|
return 0;
|
|
size_t i = std::wstring::npos;
|
|
if (ch1 >= 0x81 && ch1 <= 0x9f) {
|
|
if (ch2 == 0x7f)
|
|
ch2 = 0x40;
|
|
else if (ch2 >= 0xfd)
|
|
ch2 += 0x41 - 0xfd;
|
|
i = (ch1 - 0x81) * (4 * 16 + 4 - 1 - minimumSecondByte) + ch2 - 1 - minimumSecondByte;
|
|
} else if (ch1 >= 0xe0 && ch1 <= 0xef) {
|
|
if (ch2 == 0x7f)
|
|
ch2 = 0x40;
|
|
else if (ch2 >= 0xfd)
|
|
ch2 += 0x41 - 0xfd;
|
|
i = (ch1 - 0xe0) * (4 * 16 + 4 - 1 - minimumSecondByte) + ch2 - 1 - minimumSecondByte
|
|
+ 31 * (4 * 16 + 4 - 1 - minimumSecondByte);
|
|
} else if (ch1 == 0x80)
|
|
i = ch2 - 1 - minimumSecondByte
|
|
+ 47 * (4 * 16 + 4 - 1 - minimumSecondByte);
|
|
else if (ch1 == 0xa0)
|
|
i = ch2 - 1 - minimumSecondByte
|
|
+ 47 * (4 * 16 + 4 - 1 - minimumSecondByte)
|
|
+ (256 - 1 - minimumSecondByte);
|
|
else if (ch1 >= 0xf0 && ch1 <= 0xff) // 0xff is skipped
|
|
i = (ch1 - 0xf0) * (256 - 1 - minimumSecondByte) + ch2 - 1 - minimumSecondByte
|
|
+ 47 * (4 * 16 + 4 - 1 - minimumSecondByte)
|
|
+ (256 - 1 - minimumSecondByte) * 2;
|
|
if (i != std::wstring::npos && i < text.size())
|
|
return text[i];
|
|
return 0;
|
|
}
|
|
|
|
/** Public class */
|
|
|
|
DynamicShiftJISCodec::DynamicShiftJISCodec(UINT codec) : d_(new D(codec)) {}
|
|
|
|
DynamicShiftJISCodec::~DynamicShiftJISCodec() { delete d_; }
|
|
|
|
int DynamicShiftJISCodec::capacity() const { return d_->capacity(); }
|
|
|
|
int DynamicShiftJISCodec::size() const { return d_->text.size(); }
|
|
|
|
bool DynamicShiftJISCodec::isEmpty() const { return d_->text.empty(); }
|
|
|
|
bool DynamicShiftJISCodec::isFull() const { return d_->isFull(); }
|
|
|
|
void DynamicShiftJISCodec::clear() { d_->text.clear(); }
|
|
|
|
int DynamicShiftJISCodec::minimumSecondByte() const { return d_->minimumSecondByte; }
|
|
|
|
void DynamicShiftJISCodec::setMinimumSecondByte(int v) { d_->minimumSecondByte = v; }
|
|
|
|
std::string DynamicShiftJISCodec::encodeSTD(const std::wstring& text, bool* dynamic) const
|
|
{
|
|
if (dynamic)
|
|
*dynamic = false;
|
|
if (!d_->codepage)
|
|
return WideStringToString(text,GetACP());
|
|
return d_->encodeSTD(reinterpret_cast<const wchar_t*>(text.c_str()), text.size(), dynamic);
|
|
}
|
|
std::wstring DynamicShiftJISCodec::decode(const std::string&data, bool *dynamic) const
|
|
{
|
|
if (dynamic)
|
|
*dynamic = false;
|
|
if (!d_->codepage)
|
|
return (StringToWideString(data , CP_ACP).value() );
|
|
if (d_->text.empty())
|
|
return (StringToWideString(data , d_->codepage).value() );
|
|
return d_->decode(data.c_str(), data.size(), dynamic);
|
|
}
|
|
|
|
// EOF
|