mirror of
https://github.com/Artikash/Textractor.git
synced 2025-01-11 01:59:14 +08:00
revert separation line
This commit is contained in:
parent
457aed96c2
commit
95b145bece
@ -197,7 +197,7 @@ public:
|
|||||||
void AddSentence(QString sentence)
|
void AddSentence(QString sentence)
|
||||||
{
|
{
|
||||||
if (sentence.size() > maxSentenceSize) sentence = SENTENCE_TOO_BIG;
|
if (sentence.size() > maxSentenceSize) sentence = SENTENCE_TOO_BIG;
|
||||||
if (!showOriginal) sentence = sentence.section("\n----\n", sentence.count("\n----\n") / 2 + 1);
|
if (!showOriginal && sentence.count(u8"\x200b \n")) sentence = sentence.split(u8"\x200b \n")[1];
|
||||||
sanitize(sentence);
|
sanitize(sentence);
|
||||||
sentence.chop(std::distance(std::remove(sentence.begin(), sentence.end(), QChar::Tabulation), sentence.end()));
|
sentence.chop(std::distance(std::remove(sentence.begin(), sentence.end(), QChar::Tabulation), sentence.end()));
|
||||||
sentenceHistory.push_back(sentence);
|
sentenceHistory.push_back(sentence);
|
||||||
|
@ -33,43 +33,6 @@ std::string Escape(const std::string& text);
|
|||||||
|
|
||||||
namespace JSON
|
namespace JSON
|
||||||
{
|
{
|
||||||
inline std::wstring UTF(int charCode)
|
|
||||||
{
|
|
||||||
return { (wchar_t)charCode };
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename C>
|
|
||||||
std::pair<std::basic_string<C>, int> Unescape(std::basic_string_view<C> text)
|
|
||||||
{
|
|
||||||
std::basic_string<C> unescaped;
|
|
||||||
int i = 0;
|
|
||||||
for (; i < text.size(); ++i)
|
|
||||||
{
|
|
||||||
auto ch = text[i];
|
|
||||||
if (ch == '"') return { unescaped, i + 1 };
|
|
||||||
if (ch == '\\')
|
|
||||||
{
|
|
||||||
ch = text[i + 1];
|
|
||||||
if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5]))
|
|
||||||
{
|
|
||||||
char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 };
|
|
||||||
unescaped += UTF(strtol(charCode, nullptr, 16));
|
|
||||||
i += 5;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (auto [original, value] : Array<char, char>{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original)
|
|
||||||
{
|
|
||||||
unescaped.push_back(value);
|
|
||||||
goto replaced;
|
|
||||||
}
|
|
||||||
unescaped.push_back(ch);
|
|
||||||
replaced: i += 1;
|
|
||||||
}
|
|
||||||
else unescaped.push_back(ch);
|
|
||||||
}
|
|
||||||
return { unescaped, i };
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename C>
|
template <typename C>
|
||||||
std::basic_string<C> Escape(std::basic_string<C> text)
|
std::basic_string<C> Escape(std::basic_string<C> text)
|
||||||
{
|
{
|
||||||
@ -93,6 +56,44 @@ namespace JSON
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename C> struct UTF {};
|
||||||
|
template <> struct UTF<wchar_t>
|
||||||
|
{
|
||||||
|
inline static std::wstring FromCodepoint(int codepoint) { return { (wchar_t)codepoint }; } // TODO: surrogate pairs
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename C>
|
||||||
|
std::pair<std::basic_string<C>, int> Unescape(std::basic_string_view<C> text)
|
||||||
|
{
|
||||||
|
std::basic_string<C> unescaped;
|
||||||
|
int i = 0;
|
||||||
|
for (; i < text.size(); ++i)
|
||||||
|
{
|
||||||
|
auto ch = text[i];
|
||||||
|
if (ch == '"') return { unescaped, i + 1 };
|
||||||
|
if (ch == '\\')
|
||||||
|
{
|
||||||
|
ch = text[i + 1];
|
||||||
|
if (ch == 'u' && isxdigit(text[i + 2]) && isxdigit(text[i + 3]) && isxdigit(text[i + 4]) && isxdigit(text[i + 5]))
|
||||||
|
{
|
||||||
|
char charCode[] = { text[i + 2], text[i + 3], text[i + 4], text[i + 5], 0 };
|
||||||
|
unescaped += UTF<C>::FromCodepoint(strtol(charCode, nullptr, 16));
|
||||||
|
i += 5;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (auto [original, value] : Array<char, char>{ { 'b', '\b' }, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'} }) if (ch == original)
|
||||||
|
{
|
||||||
|
unescaped.push_back(value);
|
||||||
|
goto replaced;
|
||||||
|
}
|
||||||
|
unescaped.push_back(ch);
|
||||||
|
replaced: i += 1;
|
||||||
|
}
|
||||||
|
else unescaped.push_back(ch);
|
||||||
|
}
|
||||||
|
return { unescaped, i };
|
||||||
|
}
|
||||||
|
|
||||||
template <typename C>
|
template <typename C>
|
||||||
struct Value : private std::variant<std::monostate, std::nullopt_t, bool, double, std::basic_string<C>, std::vector<Value<C>>, std::unordered_map<std::basic_string<C>, Value<C>>>
|
struct Value : private std::variant<std::monostate, std::nullopt_t, bool, double, std::basic_string<C>, std::vector<Value<C>>, std::unordered_map<std::basic_string<C>, Value<C>>>
|
||||||
{
|
{
|
||||||
@ -119,10 +120,10 @@ namespace JSON
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename C>
|
template <typename C, int maxDepth = 25>
|
||||||
Value<C> Parse(std::basic_string_view<C> text, int64_t& i, int depth)
|
Value<C> Parse(std::basic_string_view<C> text, int64_t& i, int depth)
|
||||||
{
|
{
|
||||||
if (depth > 25) return {};
|
if (depth > maxDepth) return {};
|
||||||
C ch;
|
C ch;
|
||||||
auto SkipWhitespace = [&]
|
auto SkipWhitespace = [&]
|
||||||
{
|
{
|
||||||
@ -169,7 +170,7 @@ namespace JSON
|
|||||||
i += 1;
|
i += 1;
|
||||||
if (SkipWhitespace()) return {};
|
if (SkipWhitespace()) return {};
|
||||||
if (ch == ']') return i += 1, Value<C>(array);
|
if (ch == ']') return i += 1, Value<C>(array);
|
||||||
if (!array.emplace_back(Parse(text, i, depth + 1))) return {};
|
if (!array.emplace_back(Parse<C, maxDepth>(text, i, depth + 1))) return {};
|
||||||
if (SkipWhitespace()) return {};
|
if (SkipWhitespace()) return {};
|
||||||
if (ch == ']') return i += 1, Value<C>(array);
|
if (ch == ']') return i += 1, Value<C>(array);
|
||||||
if (ch != ',') return {};
|
if (ch != ',') return {};
|
||||||
@ -188,7 +189,7 @@ namespace JSON
|
|||||||
auto key = ExtractString();
|
auto key = ExtractString();
|
||||||
if (SkipWhitespace() || ch != ':') return {};
|
if (SkipWhitespace() || ch != ':') return {};
|
||||||
i += 1;
|
i += 1;
|
||||||
if (!(object[std::move(key)] = Parse(text, i, depth + 1))) return {};
|
if (!(object[std::move(key)] = Parse<C, maxDepth>(text, i, depth + 1))) return {};
|
||||||
if (SkipWhitespace()) return {};
|
if (SkipWhitespace()) return {};
|
||||||
if (ch == '}') return i += 1, Value<C>(object);
|
if (ch == '}') return i += 1, Value<C>(object);
|
||||||
if (ch != ',') return {};
|
if (ch != ',') return {};
|
||||||
|
@ -161,7 +161,7 @@ bool ProcessSentence(std::wstring& sentence, SentenceInfo sentenceInfo)
|
|||||||
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
if (cache && translationCache->size() > savedSize + 50) SaveCache();
|
||||||
|
|
||||||
for (int i = 0; i < translation.size(); ++i) if (translation[i] == '\r' && translation[i + 1] == '\n') translation[i] = 0x200b; // for some reason \r appears as newline - no need to double
|
for (int i = 0; i < translation.size(); ++i) if (translation[i] == '\r' && translation[i + 1] == '\n') translation[i] = 0x200b; // for some reason \r appears as newline - no need to double
|
||||||
if (!translation.empty()) (sentence += L"\n----\n") += translation;
|
if (!translation.empty()) (sentence += L"\x200b \n") += translation;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user