diff --git a/gui/PointerTable.h b/gui/PointerTable.h
new file mode 100644
index 0000000..b29c65e
--- /dev/null
+++ b/gui/PointerTable.h
@@ -0,0 +1,119 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#pragma once
+
+template
+class PointerTable
+{
+public:
+ PointerTable()
+ {
+ assert((default_size & (default_size - 1)) == 0);
+ size = default_size;
+ table = new T*[size];
+ used = 0;
+ next = 0;
+ memset(table, 0, size * sizeof(T*));
+ }
+ ~PointerTable()
+ {
+ delete table;
+ }
+ T* Set(unsigned int number, T* ptr)
+ {
+ if (number >= size - 2)
+ {
+ unsigned int new_size = size;
+ while (number >= new_size - 2) new_size <<= 1;
+ Resize(new_size);
+ }
+ T* original = table[number + 1];
+ table[number + 1] = ptr;
+ if (ptr == 0) //Clear pointer.
+ {
+ if (number < next) next = number;
+ if (number == used - 1) //Last used position is cleared.
+ {
+ table[0] = (T*)1;
+ for (used--; table[used] == 0; used--);
+ }
+ }
+ else //Set pointer.
+ {
+ __assume(number < size - 2); //Otherwise a resize operation is invoked.
+ if (number == next)
+ {
+ next++; //Next position is occupied.
+ for (next++; table[next]; next++); //There is always a zero in the end.
+ next--; //next is zero based but the table start at one(zero is used as sentry).
+ }
+ if (number >= used) used = number + 1;
+ }
+ return original;
+ }
+ T* Get(unsigned int number)
+ {
+ number++;
+ if (number <= used) return table[number];
+ else return 0;
+ }
+ T* operator [](unsigned int number)
+ {
+ number++;
+ if (number <= used) return table[number];
+ else return 0;
+ }
+ void Append(T* ptr)
+ {
+ Set(next,ptr);
+ }
+ void Resize(unsigned int new_size)
+ {
+ assert(new_size > size);
+ assert((new_size & (new_size - 1)) == 0);
+ assert(new_size < 0x10000);
+
+ T** temp = new T*[new_size];
+ memcpy(temp, table, size * sizeof(T*));
+ memset(temp + size, 0, (new_size - size) * sizeof(T*));
+ delete table;
+ size = new_size;
+ table = temp;
+ }
+ void DeleteAll() //Release all pointers on demand.
+ {
+ T* p;
+ next = 0;
+ while (used)
+ {
+ p = table[used];
+ if (p) delete p;
+ table[used] = 0;
+ used--;
+ }
+ }
+ void Reset() //Reset without release pointers.
+ {
+ memset(table, 0, sizeof(T*) * (used + 1));
+ next = 0;
+ used = 0;
+
+ }
+ unsigned int size,next,used;
+ T** table;
+};
diff --git a/gui/ProcessWindow.cpp b/gui/ProcessWindow.cpp
new file mode 100644
index 0000000..c4b21b5
--- /dev/null
+++ b/gui/ProcessWindow.cpp
@@ -0,0 +1,140 @@
+#include "ProcessWindow.h"
+#include "resource.h"
+#include "ith/host/srv.h"
+#include "ith/host/hookman.h"
+#include "ProfileManager.h"
+#include "Profile.h"
+
+extern HookManager* man; // main.cpp
+extern ProfileManager* pfman; // ProfileManager.cpp
+
+ProcessWindow::ProcessWindow(HWND hDialog) : hDlg(hDialog)
+{
+ hbRefresh = GetDlgItem(hDlg, IDC_BUTTON1);
+ hbAttach = GetDlgItem(hDlg, IDC_BUTTON2);
+ hbDetach = GetDlgItem(hDlg, IDC_BUTTON3);
+ hbAddProfile = GetDlgItem(hDlg, IDC_BUTTON5);
+ hbRemoveProfile = GetDlgItem(hDlg, IDC_BUTTON6);
+ EnableWindow(hbAddProfile, FALSE);
+ EnableWindow(hbRemoveProfile, FALSE);
+ hlProcess = GetDlgItem(hDlg, IDC_LIST1);
+ heOutput = GetDlgItem(hDlg, IDC_EDIT1);
+ ListView_SetExtendedListViewStyleEx(hlProcess, LVS_EX_FULLROWSELECT, LVS_EX_FULLROWSELECT);
+ InitProcessDlg();
+ RefreshProcess();
+}
+
+void ProcessWindow::InitProcessDlg()
+{
+ LVCOLUMN lvc = {};
+ lvc.mask = LVCF_FMT | LVCF_TEXT | LVCF_WIDTH;
+ lvc.fmt = LVCFMT_RIGHT; // left-aligned column
+ lvc.cx = 40;
+ lvc.pszText = L"PID";
+ ListView_InsertColumn(hlProcess, 0, &lvc);
+ lvc.cx = 100;
+ lvc.fmt = LVCFMT_LEFT; // left-aligned column
+ lvc.pszText = L"Name";
+ ListView_InsertColumn(hlProcess, 1, &lvc);
+}
+
+void ProcessWindow::RefreshProcess()
+{
+ ListView_DeleteAllItems(hlProcess);
+ LVITEM item = {};
+ item.mask = LVIF_TEXT | LVIF_PARAM | LVIF_STATE;
+ DWORD idProcess[1024], cbNeeded;
+ WCHAR path[MAX_PATH];
+
+ if (EnumProcesses(idProcess, sizeof(idProcess), &cbNeeded))
+ {
+ DWORD len = cbNeeded / sizeof(DWORD);
+ for (DWORD i = 0; i < len; ++i)
+ {
+ DWORD pid = idProcess[i];
+ UniqueHandle hProcess(OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid));
+ if (hProcess)
+ {
+ if (GetProcessImageFileName(hProcess.get(), path, MAX_PATH))
+ {
+ WCHAR buffer[256];
+ std::swprintf(buffer, L"%d", pid);
+ PWCHAR name = wcsrchr(path, L'\\') + 1;
+ item.pszText = buffer;
+ item.lParam = pid;
+ ListView_InsertItem(hlProcess, &item);
+ ListView_SetItemText(hlProcess, item.iItem, 1, name);
+ }
+ }
+ }
+ }
+}
+
+void ProcessWindow::AttachProcess()
+{
+ DWORD pid = GetSelectedPID();
+ if (IHF_InjectByPID(pid) != -1)
+ RefreshThreadWithPID(pid, true);
+}
+
+void ProcessWindow::DetachProcess()
+{
+ DWORD pid = GetSelectedPID();
+ if (IHF_ActiveDetachProcess(pid) == 0)
+ RefreshThreadWithPID(pid, false);
+}
+
+void ProcessWindow::AddCurrentToProfile()
+{
+ DWORD pid = GetSelectedPID();
+ auto path = GetProcessPath(pid);
+ if (!path.empty())
+ {
+ Profile* pf = pfman->AddProfile(path, pid);
+ pfman->FindProfileAndUpdateHookAddresses(pid, path);
+ RefreshThread(ListView_GetSelectionMark(hlProcess));
+ }
+}
+
+void ProcessWindow::RemoveCurrentFromProfile()
+{
+ DWORD pid = GetSelectedPID();
+ auto path = GetProcessPath(pid);
+ if (!path.empty())
+ {
+ pfman->DeleteProfile(path);
+ RefreshThread(ListView_GetSelectionMark(hlProcess));
+ }
+}
+
+void ProcessWindow::RefreshThread(int index)
+{
+ LVITEM item = {};
+ item.mask = LVIF_PARAM;
+ item.iItem = index;
+ ListView_GetItem(hlProcess, &item);
+ DWORD pid = item.lParam;
+ bool isAttached = man->GetProcessRecord(pid) != NULL;
+ RefreshThreadWithPID(pid, isAttached);
+}
+
+void ProcessWindow::RefreshThreadWithPID(DWORD pid, bool isAttached)
+{
+ EnableWindow(hbDetach, isAttached);
+ EnableWindow(hbAttach, !isAttached);
+ auto path = GetProcessPath(pid);
+ bool hasProfile = !path.empty() && pfman->HasProfile(path);
+ EnableWindow(hbAddProfile, isAttached && !hasProfile);
+ EnableWindow(hbRemoveProfile, hasProfile);
+ if (pid == GetCurrentProcessId())
+ EnableWindow(hbAttach, FALSE);
+}
+
+DWORD ProcessWindow::GetSelectedPID()
+{
+ LVITEM item={};
+ item.mask = LVIF_PARAM;
+ item.iItem = ListView_GetSelectionMark(hlProcess);
+ ListView_GetItem(hlProcess, &item);
+ return item.lParam;
+}
diff --git a/gui/ProcessWindow.h b/gui/ProcessWindow.h
new file mode 100644
index 0000000..8b6a546
--- /dev/null
+++ b/gui/ProcessWindow.h
@@ -0,0 +1,22 @@
+#pragma once
+#include "ITH.h"
+
+class ProcessWindow
+{
+public:
+ ProcessWindow(HWND hDialog);
+ void InitProcessDlg();
+ void RefreshProcess();
+ void AttachProcess();
+ void DetachProcess();
+ void AddCurrentToProfile();
+ void RemoveCurrentFromProfile();
+ void RefreshThread(int index);
+private:
+ void RefreshThreadWithPID(DWORD pid, bool isAttached);
+ DWORD GetSelectedPID();
+ HWND hDlg;
+ HWND hlProcess;
+ HWND hbRefresh,hbAttach,hbDetach,hbAddProfile,hbRemoveProfile;
+ HWND heOutput;
+};
diff --git a/gui/Profile.cpp b/gui/Profile.cpp
new file mode 100644
index 0000000..d25a6b5
--- /dev/null
+++ b/gui/Profile.cpp
@@ -0,0 +1,341 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+#include "ITH.h"
+#include "ith/host/srv.h"
+#include "ith/host/hookman.h"
+#include "ith/common/types.h"
+#include "ith/common/const.h"
+#include "Profile.h"
+#include "utility.h"
+
+Profile::Profile(const std::wstring& title) :
+select_index(-1),
+title(title)
+{}
+
+std::vector::const_iterator Profile::FindThreadProfile(const ThreadParameter& tp) const
+{
+ auto thread_profile = std::find_if(threads.begin(), threads.end(),
+ [&tp](const thread_ptr& thread_profile) -> bool
+ {
+ if (thread_profile->HookAddress() != tp.hook)
+ return false;
+ DWORD t1 = thread_profile->Return();
+ DWORD t2 = tp.retn;
+ if (thread_profile->Flags() & THREAD_MASK_RETN)
+ {
+ t1 &= 0xFFFF;
+ t2 &= 0xFFFF;
+ }
+ if (t1 != t2)
+ return false;
+ t1 = thread_profile->Split();
+ t2 = tp.spl;
+ if (thread_profile->Flags() & THREAD_MASK_SPLIT)
+ {
+ t1 &= 0xFFFF;
+ t2 &= 0xFFFF;
+ }
+ return t1 == t2;
+ });
+ return thread_profile;
+}
+
+const std::vector& Profile::Hooks() const
+{
+ return hooks;
+}
+
+const std::vector& Profile::Threads() const
+{
+ return threads;
+}
+
+const std::vector& Profile::Links() const
+{
+ return links;
+}
+
+bool Profile::XmlReadProfile(pugi::xml_node profile)
+{
+ auto hooks_node = profile.child(L"Hooks");
+ auto threads_node = profile.child(L"Threads");
+ auto links_node = profile.child(L"Links");
+ if (hooks_node && !XmlReadProfileHook(hooks_node))
+ return false;
+ if (threads_node && !XmlReadProfileThread(threads_node))
+ return false;
+ if (links_node && !XmlReadProfileLink(links_node))
+ return false;
+ auto select_node = profile.child(L"Select");
+ if (select_node)
+ {
+ auto thread_index = select_node.attribute(L"ThreadIndex");
+ if (!thread_index)
+ return false;
+ DWORD tmp_select = std::stoul(thread_index.value(), NULL, 16);
+ select_index = tmp_select & 0xFFFF;
+ }
+ return true;
+}
+
+bool Profile::XmlReadProfileHook(pugi::xml_node hooks_node)
+{
+ for (auto hook = hooks_node.begin(); hook != hooks_node.end(); ++hook)
+ {
+ std::wstring name = hook->name();
+ if (name.empty() || name.compare(L"Hook") != 0)
+ return false;
+ auto type = hook->attribute(L"Type");
+ if (!type || type.empty())
+ return false;
+ auto code = hook->attribute(L"Code");
+ if (!code)
+ return false;
+ std::wstring code_value = code.value();
+ HookParam hp = {};
+ switch (type.value()[0])
+ {
+ case L'H':
+ if (code_value[0] != L'/')
+ return false;
+ if (code_value[1] != L'H' && code_value[1] != L'h')
+ return false;
+ if (Parse(code_value.substr(2), hp))
+ {
+ auto name = hook->attribute(L"Name");
+ if (!name || name.empty())
+ AddHook(hp, L"");
+ else
+ AddHook(hp, name.value());
+ }
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Profile::XmlReadProfileThread(pugi::xml_node threads_node)
+{
+ std::wstring hook_name_buffer;
+ for (auto thread = threads_node.begin(); thread != threads_node.end(); ++thread)
+ {
+ std::wstring name = thread->name();
+ if (name.empty() || name.compare(L"Thread") != 0)
+ return false;
+ auto hook_name = thread->attribute(L"HookName");
+ if (!hook_name)
+ return false;
+ auto context = thread->attribute(L"Context");
+ if (!context)
+ return false;
+ auto sub_context = thread->attribute(L"SubContext");
+ if (!sub_context)
+ return false;
+ auto mask = thread->attribute(L"Mask");
+ if (!mask)
+ return false;
+ DWORD mask_tmp = std::stoul(mask.value(), NULL, 16);
+ auto comment = thread->attribute(L"Comment");
+ auto retn = std::stoul(context.value(), NULL, 16);
+ WORD hm_index = 0;
+ auto hook_addr = 0;
+ auto split = std::stoul(sub_context.value(), NULL, 16);
+ WORD flags = mask_tmp & 0xFFFF;
+ auto tp = new ThreadProfile(hook_name.value(), retn, split, hook_addr, hm_index, flags,
+ comment.value());
+ AddThread(thread_ptr(tp));
+ }
+ return true;
+}
+
+bool Profile::XmlReadProfileLink(pugi::xml_node links_node)
+{
+ for (auto link = links_node.begin(); link != links_node.end(); ++link)
+ {
+ std::wstring name = link->name();
+ if (name.empty() || name.compare(L"Link") != 0)
+ return false;
+ auto from = link->attribute(L"From");
+ if (!from)
+ return false;
+ DWORD link_from = std::stoul(from.value(), NULL, 16);
+ auto to = link->attribute(L"To");
+ if (!to)
+ return false;
+ DWORD link_to = std::stoul(to.value(), NULL, 16);
+ auto lp = new LinkProfile(link_from & 0xFFFF, link_to & 0xFFFF);
+ AddLink(link_ptr(lp));
+ }
+ return true;
+}
+
+bool Profile::XmlWriteProfile(pugi::xml_node profile_node)
+{
+ if (!hooks.empty())
+ {
+ auto node = profile_node.append_child(L"Hooks");
+ XmlWriteProfileHook(node);
+ }
+ if (!threads.empty())
+ {
+ auto node = profile_node.append_child(L"Threads");
+ XmlWriteProfileThread(node);
+ }
+ if (!links.empty())
+ {
+ auto node = profile_node.append_child(L"Links");
+ XmlWriteProfileLink(node);
+ }
+ if (select_index != 0xFFFF)
+ {
+ auto node = profile_node.append_child(L"Select");
+ node.append_attribute(L"ThreadIndex") = select_index;
+ }
+ return true;
+}
+
+bool Profile::XmlWriteProfileHook(pugi::xml_node hooks_node)
+{
+ for (auto hook = hooks.begin(); hook != hooks.end(); ++hook)
+ {
+ auto hook_node = hooks_node.append_child(L"Hook");
+ hook_node.append_attribute(L"Type") = L"H";
+ hook_node.append_attribute(L"Code") = GetCode((*hook)->HP()).c_str();
+ if (!(*hook)->Name().empty())
+ hook_node.append_attribute(L"Name") = (*hook)->Name().c_str();
+ }
+ return true;
+}
+
+bool Profile::XmlWriteProfileThread(pugi::xml_node threads_node)
+{
+ for (auto thread = threads.begin(); thread != threads.end(); ++thread)
+ {
+ const std::wstring& name = (*thread)->HookName();
+ if (name.empty())
+ return false;
+ auto node = threads_node.append_child(L"Thread");
+ node.append_attribute(L"HookName") = name.c_str();
+ node.append_attribute(L"Mask") = ToHexString((*thread)->Flags() & 3).c_str();
+ node.append_attribute(L"SubContext") = ToHexString((*thread)->Split()).c_str();
+ node.append_attribute(L"Context") = ToHexString((*thread)->Return()).c_str();
+ if (!(*thread)->Comment().empty())
+ node.append_attribute(L"Comment") = (*thread)->Comment().c_str();
+ }
+ return true;
+}
+
+bool Profile::XmlWriteProfileLink(pugi::xml_node links_node)
+{
+ for (auto link = links.begin(); link != links.end(); ++link)
+ {
+ auto node = links_node.append_child(L"Link");
+ node.append_attribute(L"From") = ToHexString((*link)->FromIndex()).c_str();
+ node.append_attribute(L"To") = ToHexString((*link)->ToIndex()).c_str();
+ }
+ return true;
+}
+
+void Profile::Clear()
+{
+ title = L"";
+ select_index = -1;
+ hooks.clear();
+ threads.clear();
+ links.clear();
+}
+
+int Profile::AddHook(const HookParam& hp, const std::wstring& name)
+{
+ //if (hook_count == 4) return;
+ auto it = std::find_if(hooks.begin(), hooks.end(), [&hp](hook_ptr& hook)
+ {
+ return hook->HP().addr == hp.addr &&
+ hook->HP().module == hp.module &&
+ hook->HP().function == hp.function;
+ });
+ if (it != hooks.end())
+ return it - hooks.begin();
+ hooks.emplace_back(new HookProfile(hp, name));
+ return hooks.size() - 1;
+}
+
+// add the thread profile and return its index
+int Profile::AddThread(thread_ptr tp)
+{
+ auto it = std::find_if(threads.begin(), threads.end(), [&tp](thread_ptr& thread)
+ {
+ return thread->HookName().compare(tp->HookName()) == 0 &&
+ thread->Return() == tp->Return() &&
+ thread->Split() == tp->Split();
+ });
+ if (it != threads.end())
+ return it - threads.begin();
+ threads.push_back(std::move(tp));
+ return threads.size() - 1;
+}
+
+int Profile::AddLink(link_ptr lp)
+{
+ auto it = std::find_if(links.begin(), links.end(), [&lp] (link_ptr& link)
+ {
+ return link->FromIndex() == lp->FromIndex() &&
+ link->ToIndex() == lp->ToIndex();
+ });
+ if (it != links.end())
+ return it - links.begin();
+ links.push_back(std::move(lp));
+ return links.size() - 1;
+}
+
+void Profile::RemoveHook(DWORD index)
+{
+ if (index >= 0 && index < hooks.size())
+ hooks.erase(hooks.begin() + index);
+}
+
+void Profile::RemoveThread(DWORD index)
+{
+ if (index >= 0 && index < threads.size())
+ {
+ links.erase(std::remove_if(links.begin(), links.end(), [index](link_ptr& link)
+ {
+ return link->FromIndex() == index + 1 || link->ToIndex() == index + 1;
+ }), links.end());
+ if (select_index == index)
+ select_index = -1;
+ threads.erase(threads.begin() + index);
+ if (index < select_index)
+ select_index--;
+ }
+}
+
+void Profile::RemoveLink(DWORD index)
+{
+ if (index >= 0 && index < links.size())
+ links.erase(links.begin() + index);
+}
+
+const std::wstring& Profile::Title() const
+{
+ return title;
+}
diff --git a/gui/Profile.h b/gui/Profile.h
new file mode 100644
index 0000000..8908163
--- /dev/null
+++ b/gui/Profile.h
@@ -0,0 +1,125 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#pragma once
+#include "ITH.h"
+#include "ith/common/types.h" // HookParam
+
+struct ThreadParameter;
+
+#define THREAD_MASK_RETN 1
+#define THREAD_MASK_SPLIT 2
+
+class HookProfile
+{
+ HookParam hp;
+ std::wstring name;
+public:
+ HookProfile(const HookParam& hp, const std::wstring& name):
+ hp(hp),
+ name(name)
+ {}
+ const HookParam& HP() const { return hp; };
+ const std::wstring& Name() const { return name; };
+};
+
+class ThreadProfile
+{
+ std::wstring hook_name;
+ DWORD retn;
+ DWORD split;
+ DWORD hook_addr;
+ WORD hm_index, flags;
+ std::wstring comment;
+public:
+ ThreadProfile(const std::wstring& hook_name,
+ DWORD retn,
+ DWORD split,
+ DWORD hook_addr,
+ WORD hm_index,
+ WORD flags,
+ const std::wstring& comment) :
+ hook_name(hook_name),
+ retn(retn),
+ split(split),
+ hook_addr(hook_addr),
+ hm_index(hm_index),
+ flags(flags),
+ comment(comment)
+ {
+ }
+ const std::wstring& HookName() const { return hook_name; }
+ const std::wstring& Comment() const { return comment; }
+ DWORD Return() const { return retn; }
+ DWORD Split() const { return split; }
+ DWORD& HookAddress() { return hook_addr; }
+ WORD& HookManagerIndex() { return hm_index; }
+ WORD Flags() const { return flags; }
+};
+
+class LinkProfile
+{
+ WORD from_index, to_index;
+public:
+ LinkProfile(WORD from_index, WORD to_index):
+ from_index(from_index),
+ to_index(to_index)
+ {}
+ WORD FromIndex() const { return from_index; }
+ WORD ToIndex() const { return to_index; }
+};
+
+typedef std::unique_ptr hook_ptr;
+typedef std::unique_ptr thread_ptr;
+typedef std::unique_ptr link_ptr;
+
+class Profile
+{
+public:
+ Profile(const std::wstring& title);
+ bool XmlReadProfile(pugi::xml_node profile_node);
+ bool XmlWriteProfile(pugi::xml_node profile_node);
+ int AddHook(const HookParam& hp, const std::wstring& name);
+ int AddThread(thread_ptr tp);
+ int AddLink(link_ptr lp);
+ void Clear();
+ const std::vector& Hooks() const;
+ const std::vector& Threads() const;
+ const std::vector& Links() const;
+ const std::wstring& Title() const;
+ std::vector::const_iterator FindThreadProfile(const ThreadParameter& tp) const;
+ WORD& SelectedIndex() { return select_index; }
+
+private:
+ void RemoveLink(DWORD index);
+ void RemoveHook(DWORD index);
+ void RemoveThread(DWORD index);
+
+ bool XmlReadProfileHook(pugi::xml_node hooks_node);
+ bool XmlReadProfileThread(pugi::xml_node threads_node);
+ bool XmlReadProfileLink(pugi::xml_node links_node);
+ bool XmlWriteProfileHook(pugi::xml_node hooks_node);
+ bool XmlWriteProfileThread(pugi::xml_node threads_node);
+ bool XmlWriteProfileLink(pugi::xml_node links_node);
+
+ std::wstring title;
+ std::vector hooks;
+ std::vector threads;
+ std::vector links;
+
+ WORD select_index;
+};
diff --git a/gui/ProfileManager.cpp b/gui/ProfileManager.cpp
new file mode 100644
index 0000000..fa65901
--- /dev/null
+++ b/gui/ProfileManager.cpp
@@ -0,0 +1,352 @@
+#include "ProfileManager.h"
+#include "Profile.h"
+#include "ith/host/srv.h"
+#include "ith/host/hookman.h"
+#include "ith/common/types.h"
+#include "ith/common/const.h"
+
+extern HookManager* man; // main.cpp
+extern LONG auto_inject, auto_insert, inject_delay; // main.cpp
+extern LONG insert_delay, process_time; // main.cpp
+bool MonitorFlag;
+ProfileManager* pfman;
+
+DWORD WINAPI MonitorThread(LPVOID lpThreadParameter);
+void AddHooksToProfile(Profile& pf, const ProcessRecord& pr);
+void AddThreadsToProfile(Profile& pf, const ProcessRecord& pr, DWORD pid);
+DWORD AddThreadToProfile(Profile& pf, const ProcessRecord& pr, TextThread& thread);
+void MakeHookRelative(const ProcessRecord& pr, HookParam& hp);
+std::wstring GetHookNameByAddress(const ProcessRecord& pr, DWORD hook_address);
+void GetHookNameToAddressMap(const ProcessRecord& pr, std::map& hookNameToAddress);
+
+ProfileManager::ProfileManager():
+hMonitorThread(IthCreateThread(MonitorThread, 0))
+{
+ LoadProfile();
+}
+ProfileManager::~ProfileManager()
+{
+ SaveProfile();
+ WaitForSingleObject(hMonitorThread.get(), 0);
+}
+
+Profile* ProfileManager::GetProfile(DWORD pid)
+{
+ std::wstring path = GetProcessPath(pid);
+ if (!path.empty())
+ {
+ auto node = profile_tree.find(path);
+ if (node != profile_tree.end())
+ return node->second.get();
+ }
+ return NULL;
+}
+
+bool ProfileManager::AddProfile(pugi::xml_node game)
+{
+ auto file = game.child(L"File");
+ auto profile = game.child(L"Profile");
+ if (!file || !profile)
+ return false;
+ auto path = file.attribute(L"Path");
+ if (!path)
+ return false;
+ auto profile_title = game.attribute(L"Title");
+ auto title = profile_title ? profile_title.value() : L"";
+ auto pf = new Profile(title);
+ if (!pf->XmlReadProfile(profile))
+ return false;
+ AddProfile(path.value(), profile_ptr(pf));
+ return true;
+}
+
+Profile* ProfileManager::AddProfile(const std::wstring& path, DWORD pid)
+{
+ CSLock lock(cs);
+ auto& pf = profile_tree[path];
+ if (!pf)
+ {
+ std::wstring title = GetProcessTitle(pid);
+ pf.reset(new Profile(title));
+ }
+ return pf.get();
+}
+
+Profile* ProfileManager::AddProfile(const std::wstring& path, profile_ptr new_profile)
+{
+ CSLock lock(cs);
+ auto& pf = profile_tree[path];
+ if (!pf)
+ pf.swap(new_profile);
+ return pf.get();
+}
+
+void ProfileManager::WriteProfileXml(const std::wstring& path, Profile& pf, pugi::xml_node root)
+{
+ auto game = root.append_child(L"Game");
+ auto file_node = game.append_child(L"File");
+ file_node.append_attribute(L"Path") = path.c_str();
+ auto profile_node = game.append_child(L"Profile");
+ pf.XmlWriteProfile(profile_node);
+ if (!pf.Title().empty())
+ {
+ if (!game.attribute(L"Title"))
+ game.append_attribute(L"Title");
+ game.attribute(L"Title") = pf.Title().c_str();
+ }
+}
+
+void ProfileManager::LoadProfile()
+{
+ pugi::xml_document doc;
+ UniqueHandle hFile(IthCreateFile(L"ITH_Profile.xml", GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING));
+ if (hFile.get() == INVALID_HANDLE_VALUE)
+ return;
+ DWORD size = GetFileSize(hFile.get(), NULL);
+ std::unique_ptr buffer(new char[size]);
+ ReadFile(hFile.get(), buffer.get(), size, &size, NULL);
+ auto result = doc.load_buffer(buffer.get(), size);
+ if (!result)
+ return;
+ auto root = doc.root().child(L"ITH_Profile");
+ if (!root)
+ return;
+ for (auto game = root.begin(); game != root.end(); ++game)
+ AddProfile(*game);
+}
+
+void ProfileManager::SaveProfile()
+{
+ pugi::xml_document doc;
+ auto root = doc.append_child(L"ITH_Profile");
+ for (auto it = profile_tree.begin(); it != profile_tree.end(); ++it) {
+ auto& path = it->first;
+ auto& profile = it->second;
+ WriteProfileXml(path, *profile, root);
+ }
+ UniqueHandle hFile(IthCreateFile(L"ITH_Profile.xml", GENERIC_WRITE, 0, CREATE_ALWAYS));
+ if (hFile.get() != INVALID_HANDLE_VALUE)
+ {
+ FileWriter fw(hFile.get());
+ doc.save(fw);
+ }
+}
+
+void ProfileManager::DeleteProfile(const std::wstring& path)
+{
+ CSLock lock(cs);
+ profile_tree.erase(profile_tree.find(path));
+}
+
+void ProfileManager::FindProfileAndUpdateHookAddresses(DWORD pid, const std::wstring& path)
+{
+ if (path.empty())
+ return;
+ auto it = profile_tree.find(path);
+ if (it == profile_tree.end())
+ return;
+ auto& pf = it->second;
+ const ProcessRecord* pr = man->GetProcessRecord(pid);
+ if (pr == NULL)
+ return;
+ // hook name -> hook address
+ std::map hookNameToAddress;
+ GetHookNameToAddressMap(*pr, hookNameToAddress);
+ for (auto thread_profile = pf->Threads().begin(); thread_profile != pf->Threads().end();
+ ++thread_profile)
+ {
+ auto it = hookNameToAddress.find((*thread_profile)->HookName());
+ if (it != hookNameToAddress.end())
+ (*thread_profile)->HookAddress() = it->second;
+ }
+}
+
+void GetHookNameToAddressMap(const ProcessRecord& pr,
+ std::map& hookNameToAddress)
+{
+ WaitForSingleObject(pr.hookman_mutex, 0);
+ auto hooks = (const Hook*)pr.hookman_map;
+ for (DWORD i = 0; i < MAX_HOOK; ++i)
+ {
+ if (hooks[i].Address() == 0)
+ continue;
+ auto& hook = hooks[i];
+ std::unique_ptr name(new WCHAR[hook.NameLength() * 2]);
+ if (ReadProcessMemory(pr.process_handle, hook.Name(), name.get(), hook.NameLength() * 2, NULL))
+ hookNameToAddress[name.get()] = hook.Address();
+ }
+ ReleaseMutex(pr.hookman_mutex);
+}
+
+bool ProfileManager::HasProfile(const std::wstring& path)
+{
+ return profile_tree.find(path) != profile_tree.end();
+}
+
+DWORD ProfileManager::ProfileCount()
+{
+ return profile_tree.size();
+}
+
+DWORD WINAPI InjectThread(LPVOID lpThreadParameter)
+{
+ DWORD pid = (DWORD)lpThreadParameter;
+ Sleep(inject_delay);
+ if (man == NULL)
+ return 0;
+ DWORD status = IHF_InjectByPID(pid);
+ if (!auto_insert)
+ return status;
+ if (status == -1)
+ return status;
+ Sleep(insert_delay);
+ const Profile* pf = pfman->GetProfile(pid);
+ if (pf)
+ {
+ SendParam sp;
+ sp.type = 0;
+ for (auto hp = pf->Hooks().begin(); hp != pf->Hooks().end(); ++hp)
+ IHF_InsertHook(pid, const_cast(&(*hp)->HP()), (*hp)->Name().c_str());
+ }
+ return status;
+}
+
+DWORD WINAPI MonitorThread(LPVOID lpThreadParameter)
+{
+ while (MonitorFlag)
+ {
+ DWORD aProcesses[1024], cbNeeded, cProcesses;
+ if (!EnumProcesses(aProcesses, sizeof(aProcesses), &cbNeeded))
+ break;
+ cProcesses = cbNeeded / sizeof(DWORD);
+ for (size_t i = 0; i < cProcesses; ++i)
+ {
+ Sleep(process_time);
+ if (!auto_inject || man == NULL || man->GetProcessRecord(aProcesses[i]))
+ continue;
+ std::wstring process_path = GetProcessPath(aProcesses[i]);
+ if (!process_path.empty() && pfman->HasProfile(process_path))
+ {
+ UniqueHandle hThread(IthCreateThread(InjectThread, aProcesses[i]));
+ WaitForSingleObject(hThread.get(), 0);
+ }
+ }
+ }
+ return 0;
+}
+
+DWORD SaveProcessProfile(DWORD pid)
+{
+ const ProcessRecord* pr = man->GetProcessRecord(pid);
+ if (pr == NULL)
+ return 0;
+ std::wstring path = GetProcessPath(pid);
+ if (path.empty())
+ return 0;
+ Profile* pf = pfman->GetProfile(pid);
+ if (pf != NULL)
+ pf->Clear();
+ else
+ pf = pfman->AddProfile(path, pid);
+ AddHooksToProfile(*pf, *pr);
+ AddThreadsToProfile(*pf, *pr, pid);
+ return 0;
+}
+
+void AddHooksToProfile(Profile& pf, const ProcessRecord& pr)
+{
+ WaitForSingleObject(pr.hookman_mutex, 0);
+ auto hooks = (const Hook*)pr.hookman_map;
+ for (DWORD i = 0; i < MAX_HOOK; ++i)
+ {
+ if (hooks[i].Address() == 0)
+ continue;
+ auto& hook = hooks[i];
+ DWORD type = hook.Type();
+ if ((type & HOOK_ADDITIONAL) && (type & HOOK_ENGINE) == 0)
+ {
+ std::unique_ptr name(new WCHAR[hook.NameLength() * 2]);
+ if (ReadProcessMemory(pr.process_handle, hook.Name(), name.get(), hook.NameLength() * 2, NULL))
+ {
+ if (hook.hp.module)
+ {
+ HookParam hp = hook.hp;
+ MakeHookRelative(pr, hp);
+ pf.AddHook(hp, name.get());
+ }
+ else
+ pf.AddHook(hook.hp, name.get());
+ }
+ }
+ }
+ ReleaseMutex(pr.hookman_mutex);
+}
+
+void MakeHookRelative(const ProcessRecord& pr, HookParam& hp)
+{
+ MEMORY_BASIC_INFORMATION info;
+ VirtualQueryEx(pr.process_handle, (LPCVOID)hp.addr, &info, sizeof(info));
+ hp.addr -= (DWORD)info.AllocationBase;
+ hp.function = 0;
+}
+
+void AddThreadsToProfile(Profile& pf, const ProcessRecord& pr, DWORD pid)
+{
+ man->LockHookman();
+ ThreadTable* table = man->Table();
+ for (int i = 0; i < table->Used(); ++i)
+ {
+ TextThread* tt = table->FindThread(i);
+ if (tt == NULL || tt->GetThreadParameter()->pid != pid)
+ continue;
+ //if (tt->Status() & CURRENT_SELECT || tt->Link() || tt->GetComment())
+ if (tt->Status() & CURRENT_SELECT || tt->Link())
+ AddThreadToProfile(pf, pr, *tt);
+ }
+ man->UnlockHookman();
+}
+
+DWORD AddThreadToProfile(Profile& pf, const ProcessRecord& pr, TextThread& thread)
+{
+ const ThreadParameter* tp = thread.GetThreadParameter();
+ std::wstring hook_name = GetHookNameByAddress(pr, tp->hook);
+ if (hook_name.empty())
+ return -1;
+ auto thread_profile = new ThreadProfile(hook_name, tp->retn, tp->spl, 0, 0,
+ THREAD_MASK_RETN | THREAD_MASK_SPLIT, L"");
+ DWORD threads_size = pf.Threads().size();
+ int thread_profile_index = pf.AddThread(thread_ptr(thread_profile));
+ if (thread_profile_index == threads_size) // new thread
+ {
+ WORD iw = thread_profile_index & 0xFFFF;
+ if (thread.Status() & CURRENT_SELECT)
+ pf.SelectedIndex() = iw;
+ if (thread.Link())
+ {
+ WORD to_index = AddThreadToProfile(pf, pr, *(thread.Link())) & 0xFFFF;
+ if (iw >= 0)
+ pf.AddLink(link_ptr(new LinkProfile(iw, to_index)));
+ }
+ }
+ return thread_profile_index; // in case more than one thread links to the same thread.
+}
+
+std::wstring GetHookNameByAddress(const ProcessRecord& pr, DWORD hook_address)
+{
+ std::wstring hook_name;
+ WaitForSingleObject(pr.hookman_mutex, 0);
+ auto hooks = (const Hook*)pr.hookman_map;
+ for (int i = 0; i < MAX_HOOK; ++i)
+ {
+ auto& hook = hooks[i];
+ if (hook.Address() == hook_address)
+ {
+ std::unique_ptr name(new WCHAR[hook.NameLength() * 2]);
+ if (ReadProcessMemory(pr.process_handle, hooks[i].Name(), name.get(), hook.NameLength() * 2, NULL))
+ hook_name = name.get();
+ break;
+ }
+ }
+ ReleaseMutex(pr.hookman_mutex);
+ return hook_name;
+}
diff --git a/gui/ProfileManager.h b/gui/ProfileManager.h
new file mode 100644
index 0000000..73d5ccb
--- /dev/null
+++ b/gui/ProfileManager.h
@@ -0,0 +1,34 @@
+#pragma once
+#include "ITH.h"
+#include "utility.h" // UniqueHandle, CriticalSection
+
+class Profile;
+
+class ProfileManager
+{
+public:
+ ProfileManager();
+ ~ProfileManager();
+ Profile* AddProfile(const std::wstring& path, DWORD pid);
+ void DeleteProfile(const std::wstring& path);
+ void LoadProfile();
+ void SaveProfile();
+ void FindProfileAndUpdateHookAddresses(DWORD pid, const std::wstring& path);
+ bool HasProfile(const std::wstring& path);
+ Profile* GetProfile(DWORD pid);
+ DWORD ProfileCount();
+private:
+ typedef std::unique_ptr profile_ptr;
+ typedef std::map profile_map;
+
+ ProfileManager(const ProfileManager&);
+ ProfileManager operator=(const ProfileManager&);
+
+ bool AddProfile(pugi::xml_node game);
+ Profile* AddProfile(const std::wstring& path, profile_ptr new_profile);
+ void WriteProfileXml(const std::wstring& path, Profile& pf, pugi::xml_node doc);
+ // locate profile with executable path
+ profile_map profile_tree;
+ CriticalSection cs;
+ UniqueHandle hMonitorThread;
+};
diff --git a/gui/TextBuffer.cpp b/gui/TextBuffer.cpp
new file mode 100644
index 0000000..2620743
--- /dev/null
+++ b/gui/TextBuffer.cpp
@@ -0,0 +1,41 @@
+#include "TextBuffer.h"
+
+DWORD WINAPI FlushThread(LPVOID lParam); // window.cpp
+
+TextBuffer::TextBuffer(HWND edit) : hThread(IthCreateThread(FlushThread, (DWORD)this)),
+ hEdit(edit),
+ running(true)
+{
+}
+
+TextBuffer::~TextBuffer()
+{
+ running = false;
+ WaitForSingleObject(hThread.get(), 0);
+}
+
+void TextBuffer::AddText(LPCWSTR str, int len, bool line)
+{
+ CSLock lock(cs);
+ if (len > 0)
+ this->str.append(str, len);
+ line_break = line;
+}
+
+void TextBuffer::Flush()
+{
+ CSLock lock(cs);
+ if (line_break || str.empty())
+ return;
+ DWORD t = Edit_GetTextLength(hEdit);
+ Edit_SetSel(hEdit, t, -1);
+ Edit_ReplaceSel(hEdit, str.c_str());
+ str.clear();
+}
+
+void TextBuffer::ClearBuffer()
+{
+ CSLock lock(cs);
+ str.clear();
+ line_break = false;
+}
diff --git a/gui/TextBuffer.h b/gui/TextBuffer.h
new file mode 100644
index 0000000..b5ec063
--- /dev/null
+++ b/gui/TextBuffer.h
@@ -0,0 +1,20 @@
+#pragma once
+#include "ITH.h"
+#include "utility.h" // UniqueHandle, CriticalSection
+
+class TextBuffer
+{
+public:
+ TextBuffer(HWND edit);
+ ~TextBuffer();
+ void Flush();
+ void AddText(LPCWSTR str, int len, bool line);
+ void ClearBuffer();
+ bool Running() { return running; }
+private:
+ CriticalSection cs;
+ bool line_break, running;
+ UniqueHandle hThread;
+ HWND hEdit;
+ std::wstring str;
+};
diff --git a/gui/language.cpp b/gui/language.cpp
new file mode 100644
index 0000000..6149eb9
--- /dev/null
+++ b/gui/language.cpp
@@ -0,0 +1,132 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+const wchar_t* Warning=L"Warning!";
+//command.cpp
+const wchar_t* ErrorSyntax=L"Syntax error";
+const wchar_t* Usage = L"Syntax:\r\n\
+\r\n\
+:H[ELP] - print help\r\n\
+:Lfrom-to - link from thread 'from' to thread 'to'\r\n\
+:Ufrom - unlink link from thread 'from'\r\n\
+\r\n\
+'from' and 'to' and hexadecimal thread numbers. The thread number is the first number in the combo box.\r\n\
+\r\n\
+Loader options:\r\n\
+/P[{process_id|Nprocess_name}] - attach to process\r\n\
+\r\n\
+Hook options:\r\n\
+/H[X]{A|B|W|S|Q}[N][data_offset[*drdo]][:sub_offset[*drso]]@addr[:module[:{name|#ordinal}]]\r\n\
+\r\n\
+All numbers in /H (except ordinal) are hexadecimal without any prefixes";
+
+const wchar_t* ExtendedUsage = L"/H[X]{A|B|W|S|Q}[N][data_offset[*drdo]][:sub_offset[*drso]]@addr[:[module[:{name|#ordinal}]]]\r\n\
+\r\n\
+Set additional custom hook\r\n\
+\r\n\
+Hook types :\r\n\
+A - DBCS char\r\n\
+B - DBCS char(big-endian)\r\n\
+W - UCS2 char\r\n\
+S - MBCS string\r\n\
+Q - UTF-16 string\r\n\
+\r\n\
+Parameters:\r\n\
+X - use hardware breakpoints\r\n\
+N - don't use contexts\r\n\
+data_offset - stack offset to char / string pointer\r\n\
+drdo - add a level of indirection to data_offset\r\n\
+sub_offset - stack offset to subcontext\r\n\
+drso - add a level of indirection to sub_offset\r\n\
+addr - address of the hook\r\n\
+module - name of the module to use as base for 'addr'\r\n\
+name - name of the 'module' export to use as base for 'addr'\r\n\
+ordinal - number of the 'module' export ordinal to use as base for 'addr'\r\n\
+\r\n\
+Negative values of 'data_offset' and 'sub_offset' refer to registers: \r\n\
+- 4 for EAX, -8 for ECX, -C for EDX, -10 for EBX, -14 for ESP, -18 for EBP, -1C for ESI, -20 for EDI\r\n\
+\r\n\
+\"Add a level of indirection\" means in C/C++ style: (*(ESP+data_offset)+drdo) instead of (ESP+data_offset)\r\n\
+\r\n\
+All numbers except ordinal are hexadecimal without any prefixes";
+
+//inject.cpp
+const wchar_t* ErrorRemoteThread=L"Can't create remote thread.";
+const wchar_t* ErrorOpenProcess=L"Can't open process.";
+const wchar_t* ErrorNoProcess=L"Process not found";
+const wchar_t* SelfAttach=L"Please do not attach to ITH.exe";
+const wchar_t* AlreadyAttach=L"Process already attached.";
+const wchar_t* FormatInject=L"Inject process %d. Module base %.8X";
+//main.cpp
+const wchar_t* NotAdmin=L"Can't enable SeDebugPrevilege. ITH might malfunction.\r\n\
+Please run ITH as administrator or turn off UAC.";
+//pipe.cpp
+const wchar_t* ErrorCreatePipe=L"Can't create text pipe or too many instance.";
+const wchar_t* FormatDetach=L"Process %d detached.";
+const wchar_t* ErrorCmdQueueFull=L"Command queue full.";
+const wchar_t* ErrorNoAttach=L"No process attached.";
+
+//profile.cpp
+const wchar_t* ErrorMonitor=L"Can't monitor process.";
+//utility.cpp
+const wchar_t* InitMessage=L"Copyright (C) 2010-2012 kaosu \r\n\
+Copyright (C) 2015 zorkzero \r\n\
+Source code \r\n\
+General discussion ";
+const wchar_t* BackgroundMsg=L"Type \":h\" or \":help\" for help.";
+const wchar_t* ErrorLinkExist=L"Link exist.";
+const wchar_t* ErrorCylicLink=L"Link failed. No cyclic link allowed.";
+const wchar_t* FormatLink=L"Link from thread%.4x to thread%.4x.";
+const wchar_t* ErrorLink=L"Link failed. Source or/and destination thread not found.";
+const wchar_t* ErrorDeleteCombo=L"Error delete from combo.";
+
+//window.cpp
+const wchar_t* ClassName=L"ITH";
+const wchar_t* ClassNameAdmin=L"ITH (Administrator)";
+const wchar_t* ErrorNotSplit=L"Need to enable split first!";
+const wchar_t* ErrorNotModule=L"Need to enable module first!";
+//Main window buttons
+const wchar_t* ButtonTitleProcess=L"Process";
+const wchar_t* ButtonTitleThread=L"Thread";
+const wchar_t* ButtonTitleHook=L"Hook";
+const wchar_t* ButtonTitleProfile=L"Profile";
+const wchar_t* ButtonTitleOption=L"Option";
+const wchar_t* ButtonTitleClear=L"Clear";
+const wchar_t* ButtonTitleSave=L"Save";
+const wchar_t* ButtonTitleTop=L"Top";
+//Hook window
+const wchar_t* SpecialHook=L"Special hook, no AGTH equivalent.";
+//Process window
+const wchar_t* TabTitlePID=L"PID";
+const wchar_t* TabTitleMemory=L"Memory";
+const wchar_t* TabTitleName=L"Name";
+const wchar_t* TabTitleTID=L"TID";
+const wchar_t* TabTitleStart=L"Start";
+const wchar_t* TabTitleModule=L"Module";
+const wchar_t* TabTitleState=L"State";
+const wchar_t* SuccessAttach=L"Attach ITH to process successfully.";
+const wchar_t* FailAttach=L"Failed to attach ITH to process.";
+const wchar_t* SuccessDetach=L"ITH detach from process.";
+const wchar_t* FailDetach=L"Detach failed.";
+//Profile window
+const wchar_t* ProfileExist=L"Profile already exists.";
+const wchar_t* SuccessAddProfile=L"Profile added.";
+const wchar_t* FailAddProfile=L"Fail to add profile";
+const wchar_t* TabTitleNumber=L"No.";
+const wchar_t* NoFile=L"Can't find file.";
+const wchar_t* PathDismatch=L"Process name dismatch, continue?";
+const wchar_t* SuccessImportProfile=L"Import profile success";
+//const wchar_t* SuccessAddProfile=L"Profile added.";
\ No newline at end of file
diff --git a/gui/language.h b/gui/language.h
new file mode 100644
index 0000000..141bf40
--- /dev/null
+++ b/gui/language.h
@@ -0,0 +1,86 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+#pragma once
+
+extern const wchar_t* Warning;
+//command.cpp
+extern const wchar_t* ErrorSyntax;
+extern const wchar_t* Usage;
+extern const wchar_t* ExtendedUsage;
+//inject.cpp
+extern const wchar_t* ErrorRemoteThread;
+extern const wchar_t* ErrorOpenProcess;
+extern const wchar_t* ErrorNoProcess;
+extern const wchar_t* SelfAttach;
+extern const wchar_t* AlreadyAttach;
+extern const wchar_t* FormatInject;
+//main.cpp
+extern const wchar_t* NotAdmin;
+//pipe.cpp
+extern const wchar_t* ErrorCreatePipe;
+extern const wchar_t* FormatDetach;
+extern const wchar_t* ErrorCmdQueueFull;
+extern const wchar_t* ErrorNoAttach;
+
+//profile.cpp
+extern const wchar_t* ErrorMonitor;
+
+//utility.cpp
+extern const wchar_t* InitMessage;
+extern const wchar_t* BackgroundMsg;
+extern const wchar_t* ErrorLinkExist;
+extern const wchar_t* ErrorCylicLink;
+extern const wchar_t* FormatLink;
+extern const wchar_t* ErrorLink;
+extern const wchar_t* ErrorDeleteCombo;
+
+//window.cpp
+extern const wchar_t* ClassName;
+extern const wchar_t* ClassNameAdmin;
+extern const wchar_t* ErrorNotSplit;
+extern const wchar_t* ErrorNotModule;
+//Main window buttons
+extern const wchar_t* ButtonTitleProcess;
+extern const wchar_t* ButtonTitleThread;
+extern const wchar_t* ButtonTitleHook;
+extern const wchar_t* ButtonTitleProfile;
+extern const wchar_t* ButtonTitleOption;
+extern const wchar_t* ButtonTitleClear;
+extern const wchar_t* ButtonTitleSave;
+extern const wchar_t* ButtonTitleTop;
+//Hook window
+extern const wchar_t* SpecialHook;
+//Process window
+extern const wchar_t* TabTitlePID;
+extern const wchar_t* TabTitleMemory;
+extern const wchar_t* TabTitleName;
+extern const wchar_t* TabTitleTID;
+extern const wchar_t* TabTitleStart;
+extern const wchar_t* TabTitleModule;
+extern const wchar_t* TabTitleState;
+extern const wchar_t* SuccessAttach;
+extern const wchar_t* FailAttach;
+extern const wchar_t* SuccessDetach;
+extern const wchar_t* FailDetach;
+//Profile window
+extern const wchar_t* ProfileExist;
+extern const wchar_t* SuccessAddProfile;
+extern const wchar_t* FailAddProfile;
+extern const wchar_t* TabTitleNumber;
+extern const wchar_t* NoFile;
+extern const wchar_t* PathDismatch;
+extern const wchar_t* SuccessImportProfile;
\ No newline at end of file
diff --git a/gui/main.cpp b/gui/main.cpp
new file mode 100644
index 0000000..5e4ff57
--- /dev/null
+++ b/gui/main.cpp
@@ -0,0 +1,283 @@
+/* Copyright (C) 2010-2012 kaosu (qiupf2000@gmail.com)
+ * This file is part of the Interactive Text Hooker.
+
+ * Interactive Text Hooker is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include "ITH.h"
+#include "ith/host/srv.h"
+#include "ith/host/hookman.h"
+#include "ith/host/SettingManager.h"
+#include "CustomFilter.h"
+#include "profile.h"
+#include "ProfileManager.h"
+
+HINSTANCE hIns;
+ATOM MyRegisterClass(HINSTANCE hInstance);
+BOOL InitInstance(HINSTANCE hInstance, DWORD nCmdShow, RECT *rc);
+RECT window;
+extern HWND hMainWnd; // windows.cpp
+extern bool MonitorFlag; // ProfileManager.cpp
+extern ProfileManager* pfman; // ProfileManager.cpp
+
+extern "C" {
+ BOOL IthInitSystemService();
+ void IthCloseSystemService();
+}
+
+CustomFilter* uni_filter;
+CustomFilter* mb_filter;
+HookManager* man;
+SettingManager* setman;
+LONG split_time, cyclic_remove, global_filter;
+LONG process_time, inject_delay, insert_delay,
+ auto_inject, auto_insert, clipboard_flag;
+
+std::map setting;
+
+void RecordMBChar(WORD mb, PVOID f)
+{
+ auto filter = (pugi::xml_node*)f;
+ DWORD m = mb;
+ WCHAR buffer[16];
+ std::swprintf(buffer, L"m%04X", m);
+ filter->append_attribute(buffer) = L"0";
+}
+
+void RecordUniChar(WORD uni, PVOID f)
+{
+ auto filter = (pugi::xml_node*)f;
+ DWORD m = uni;
+ WCHAR buffer[16];
+ std::swprintf(buffer, L"u%04X", m);
+ filter->append_attribute(buffer) = L"0";
+ std::wstring text = filter->text().get();
+ text += (wchar_t)m;
+ filter->text().set(text.c_str());
+}
+
+void SaveSettings()
+{
+ GetWindowRect(hMainWnd, &window);
+ setting[L"window_left"] = window.left;
+ setting[L"window_right"] = window.right;
+ setting[L"window_top"] = window.top;
+ setting[L"window_bottom"] = window.bottom;
+ setting[L"split_time"] = split_time;
+ setting[L"process_time"] = process_time;
+ setting[L"inject_delay"] = inject_delay;
+ setting[L"insert_delay"] = insert_delay;
+ setting[L"auto_inject"] = auto_inject;
+ setting[L"auto_insert"] = auto_insert;
+ setting[L"auto_copy"] = clipboard_flag;
+ setting[L"auto_suppress"] = cyclic_remove;
+ setting[L"global_filter"] = global_filter;
+
+ UniqueHandle hFile(IthCreateFile(L"ITH.xml", GENERIC_WRITE, FILE_SHARE_READ, CREATE_ALWAYS));
+ if (hFile.get() != INVALID_HANDLE_VALUE)
+ {
+ FileWriter fw(hFile.get());
+ pugi::xml_document doc;
+ auto root = doc.root().append_child(L"ITH_Setting");
+ for (auto it = setting.begin(); it != setting.end(); ++it)
+ root.append_attribute(it->first.c_str()).set_value(it->second);
+ auto filter = root.append_child(L"SingleCharFilter");
+ filter.append_child(pugi::xml_node_type::node_pcdata);
+ mb_filter->Traverse(RecordMBChar, &filter);
+ uni_filter->Traverse(RecordUniChar, &filter);
+ doc.save(fw);
+ }
+}
+
+void DefaultSettings()
+{
+ setting[L"split_time"] = 200;
+ setting[L"process_time"] = 50;
+ setting[L"inject_delay"] = 3000;
+ setting[L"insert_delay"] = 500;
+ setting[L"auto_inject"] = 1;
+ setting[L"auto_insert"] = 1;
+ setting[L"auto_copy"] = 0;
+ setting[L"auto_suppress"] = 0;
+ setting[L"global_filter"] = 0;
+ setting[L"window_left"] = 100;
+ setting[L"window_right"] = 800;
+ setting[L"window_top"] = 100;
+ setting[L"window_bottom"] = 600;
+}
+
+void InitializeSettings()
+{
+ split_time = setting[L"split_time"];
+ process_time = setting[L"process_time"];
+ inject_delay = setting[L"inject_delay"];
+ insert_delay = setting[L"insert_delay"];
+ auto_inject = setting[L"auto_inject"];
+ auto_insert = setting[L"auto_insert"];
+ clipboard_flag = setting[L"auto_copy"];
+ cyclic_remove = setting[L"auto_suppress"];
+ global_filter = setting[L"global_filter"];
+ window.left = setting[L"window_left"];
+ window.right = setting[L"window_right"];
+ window.top = setting[L"window_top"];
+ window.bottom = setting[L"window_bottom"];
+
+ if (auto_inject > 1)
+ auto_inject = 1;
+ if (auto_insert > 1)
+ auto_insert = 1;
+ if (clipboard_flag > 1)
+ clipboard_flag = 1;
+ if (cyclic_remove > 1)
+ cyclic_remove = 1;
+
+ if (window.right < window.left || window.right - window.left < 600)
+ window.right = window.left + 600;
+ if (window.bottom < window.top || window.bottom - window.top < 200)
+ window.bottom = window.top + 200;
+}
+
+void LoadSettings()
+{
+ UniqueHandle hFile(IthCreateFile(L"ITH.xml", GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING));
+ if (hFile.get() != INVALID_HANDLE_VALUE)
+ {
+ DWORD size = GetFileSize(hFile.get(), NULL);
+ std::unique_ptr buffer(new char[size]);
+ ReadFile(hFile.get(), buffer.get(), size, &size, NULL);
+ pugi::xml_document doc;
+ auto result = doc.load_buffer_inplace(buffer.get(), size);
+ if (!result)
+ return;
+ auto root = doc.root().child(L"ITH_Setting");
+ for (auto attr = root.attributes_begin(); attr != root.attributes_end(); ++attr)
+ {
+ auto it = setting.find(attr->name());
+ if (it != setting.end())
+ it->second = std::stoul(attr->value());
+ }
+ auto filter = root.child(L"SingleCharFilter");
+ if (filter)
+ {
+ for (auto attr = filter.attributes_begin(); attr != filter.attributes_end(); ++attr)
+ {
+ if (attr->name()[0] == L'm')
+ {
+ DWORD c = std::stoul(attr->name() + 1, NULL, 16);
+ mb_filter->Insert(c & 0xFFFF);
+ }
+ else if (attr->name()[0] == L'u')
+ {
+ DWORD c = std::stoul(attr->name() + 1, NULL, 16);
+ uni_filter->Insert(c & 0xFFFF);
+ }
+ }
+ std::wstring filter_value = filter.text().get();
+ for (auto it = filter_value.begin(); it != filter_value.end(); ++it)
+ {
+ WCHAR filter_unichar[2] = { *it, L'\0' };
+ char filter_mbchar[4];
+ WC_MB(filter_unichar, filter_mbchar, 4);
+ mb_filter->Insert(*(WORD*)filter_mbchar);
+ uni_filter->Insert(*it);
+ }
+ }
+ }
+}
+
+extern LPCWSTR ClassName, ClassNameAdmin;
+static WCHAR mutex[] = L"ITH_RUNNING";
+DWORD FindITH()
+{
+ HWND hwnd = FindWindow(ClassName, ClassName);
+ if (hwnd == NULL)
+ hwnd = FindWindow(ClassName, ClassNameAdmin);
+ if (hwnd)
+ {
+ ShowWindow(hwnd, SW_SHOWNORMAL);
+ SetForegroundWindow(hwnd);
+ return 0;
+ }
+ return 1;
+}
+LONG WINAPI UnhandledExcept(_EXCEPTION_POINTERS *ExceptionInfo)
+{
+ wchar_t path_name[512]; // fully qualified path name
+ WCHAR code[16];
+ EXCEPTION_RECORD* rec = ExceptionInfo->ExceptionRecord;
+ std::swprintf(code, L"%08X", rec->ExceptionCode);
+ MEMORY_BASIC_INFORMATION info;
+ if (VirtualQuery(rec->ExceptionAddress, &info, sizeof(info)))
+ {
+ if (GetModuleFileName((HMODULE)info.AllocationBase, path_name, 512))
+ {
+ LPWSTR name = wcsrchr(path_name, L'\\');
+ if (name)
+ {
+ DWORD addr = (DWORD)rec->ExceptionAddress;
+ std::swprintf(name, L"%s:%08X", name + 1, addr - (DWORD)info.AllocationBase);
+ MessageBox(NULL, name, code, MB_OK);
+ TerminateProcess(GetCurrentProcess(), 0);
+ }
+ }
+ }
+ std::swprintf(path_name, L"%08X", rec->ExceptionAddress);
+ MessageBox(NULL, path_name, code, MB_OK);
+ TerminateProcess(GetCurrentProcess(), 0);
+ return 0;
+}
+
+int WINAPI WinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow)
+{
+ if (!IthInitSystemService())
+ TerminateProcess(GetCurrentProcess(), 0);
+ CreateMutex(NULL, TRUE, L"ITH_MAIN_RUNNING");
+ if (IHF_Init())
+ {
+ SetUnhandledExceptionFilter(UnhandledExcept);
+ IHF_GetHookManager(&man);
+ IHF_GetSettingManager(&setman);
+ setman->SetValue(SETTING_SPLIT_TIME, 200);
+ MonitorFlag = true;
+ pfman = new ProfileManager();
+ mb_filter = new CustomFilter();
+ uni_filter = new CustomFilter();
+ DefaultSettings();
+ LoadSettings();
+ InitializeSettings();
+ setman->SetValue(SETTING_SPLIT_TIME, split_time);
+ setman->SetValue(SETTING_CLIPFLAG, clipboard_flag);
+ hIns = hInstance;
+ MyRegisterClass(hIns);
+ InitInstance(hIns, IHF_IsAdmin(), &window);
+ MSG msg;
+ while (GetMessage(&msg, NULL, 0, 0))
+ {
+ TranslateMessage(&msg);
+ DispatchMessage(&msg);
+ }
+ //delete mb_filter;
+ //delete uni_filter;
+ delete pfman;
+ MonitorFlag = false;
+ man = NULL;
+ }
+ else
+ {
+ FindITH();
+ }
+ IHF_Cleanup();
+ IthCloseSystemService();
+ TerminateProcess(GetCurrentProcess(), 0);
+}
diff --git a/gui/pugixml.cpp b/gui/pugixml.cpp
new file mode 100644
index 0000000..dd3f427
--- /dev/null
+++ b/gui/pugixml.cpp
@@ -0,0 +1,11484 @@
+/**
+ * pugixml parser - version 1.5
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2014, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
+ */
+
+#ifndef SOURCE_PUGIXML_CPP
+#define SOURCE_PUGIXML_CPP
+
+#include "pugixml.hpp"
+
+#include
+#include
+#include
+#include
+
+#ifdef PUGIXML_WCHAR_MODE
+# include
+#endif
+
+#ifndef PUGIXML_NO_XPATH
+# include
+# include
+# ifdef PUGIXML_NO_EXCEPTIONS
+# include
+# endif
+#endif
+
+#ifndef PUGIXML_NO_STL
+# include
+# include
+# include
+#endif
+
+// For placement new
+#include
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 279) // controlling expression is constant
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
+#endif
+
+#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
+# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
+#endif
+
+#ifdef __BORLANDC__
+# pragma option push
+# pragma warn -8008 // condition is always false
+# pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
+# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGI__NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+# define PUGI__NO_INLINE __attribute__((noinline))
+#else
+# define PUGI__NO_INLINE
+#endif
+
+// Branch weight controls
+#if defined(__GNUC__)
+# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
+#else
+# define PUGI__UNLIKELY(cond) (cond)
+#endif
+
+// Simple static assertion
+#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define PUGI__DMC_VOLATILE volatile
+#else
+# define PUGI__DMC_VOLATILE
+#endif
+
+// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
+#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
+using std::memcpy;
+using std::memmove;
+#endif
+
+// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
+#if defined(_MSC_VER) && !defined(__S3E__)
+# define PUGI__MSVC_CRT_VERSION _MSC_VER
+#endif
+
+#ifdef PUGIXML_HEADER_ONLY
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# define PUGI__FN inline
+# define PUGI__FN_NO_INLINE inline
+#else
+# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
+# define PUGI__NS_BEGIN namespace pugi { namespace impl {
+# define PUGI__NS_END } }
+# else
+# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
+# define PUGI__NS_END } } }
+# endif
+# define PUGI__FN
+# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include
+#else
+# ifndef _UINTPTR_T_DEFINED
+// No native uintptr_t in MSVC6 and in some WinCE versions
+typedef size_t uintptr_t;
+#define _UINTPTR_T_DEFINED
+# endif
+PUGI__NS_BEGIN
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+PUGI__NS_END
+#endif
+
+// Memory allocation
+PUGI__NS_BEGIN
+ PUGI__FN void* default_allocate(size_t size)
+ {
+ return malloc(size);
+ }
+
+ PUGI__FN void default_deallocate(void* ptr)
+ {
+ free(ptr);
+ }
+
+ template
+ struct xml_memory_management_function_storage
+ {
+ static allocation_function allocate;
+ static deallocation_function deallocate;
+ };
+
+ // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
+ // Without a template<> we'll get multiple definitions of the same static
+ template allocation_function xml_memory_management_function_storage::allocate = default_allocate;
+ template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate;
+
+ typedef xml_memory_management_function_storage xml_memory;
+PUGI__NS_END
+
+// String utilities
+PUGI__NS_BEGIN
+ // Get string length
+ PUGI__FN size_t strlength(const char_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ return strlen(s);
+ #endif
+ }
+
+ // Compare two strings
+ PUGI__FN bool strequal(const char_t* src, const char_t* dst)
+ {
+ assert(src && dst);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+ #else
+ return strcmp(src, dst) == 0;
+ #endif
+ }
+
+ // Compare lhs with [rhs_begin, rhs_end)
+ PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+ {
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+ }
+
+ // Get length of wide string, even if CRT lacks wide character support
+ PUGI__FN size_t strlength_wide(const wchar_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ const wchar_t* end = s;
+ while (*end) end++;
+ return static_cast(end - s);
+ #endif
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ // Convert string to wide string, assuming all symbols are ASCII
+ PUGI__FN void widen_ascii(wchar_t* dest, const char* source)
+ {
+ for (const char* i = source; *i; ++i) *dest++ = *i;
+ *dest = 0;
+ }
+#endif
+PUGI__NS_END
+
+#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
+// auto_ptr-like buffer holder for exception recovery
+PUGI__NS_BEGIN
+ struct buffer_holder
+ {
+ void* data;
+ void (*deleter)(void*);
+
+ buffer_holder(void* data_, void (*deleter_)(void*)): data(data_), deleter(deleter_)
+ {
+ }
+
+ ~buffer_holder()
+ {
+ if (data) deleter(data);
+ }
+
+ void* release()
+ {
+ void* result = data;
+ data = 0;
+ return result;
+ }
+ };
+PUGI__NS_END
+#endif
+
+PUGI__NS_BEGIN
+ static const size_t xml_memory_page_size =
+ #ifdef PUGIXML_MEMORY_PAGE_SIZE
+ PUGIXML_MEMORY_PAGE_SIZE
+ #else
+ 32768
+ #endif
+ ;
+
+ static const uintptr_t xml_memory_page_alignment = 64;
+ static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+ static const uintptr_t xml_memory_page_contents_shared_mask = 32;
+ static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+ static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+ static const uintptr_t xml_memory_page_type_mask = 7;
+ static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
+ static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
+
+ #define PUGI__NODETYPE(n) static_cast(((n)->header & impl::xml_memory_page_type_mask) + 1)
+
+ struct xml_allocator;
+
+ struct xml_memory_page
+ {
+ static xml_memory_page* construct(void* memory)
+ {
+ xml_memory_page* result = static_cast(memory);
+
+ result->allocator = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+ return result;
+ }
+
+ xml_allocator* allocator;
+
+ xml_memory_page* prev;
+ xml_memory_page* next;
+
+ size_t busy_size;
+ size_t freed_size;
+ };
+
+ struct xml_memory_string_header
+ {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+ };
+
+ struct xml_allocator
+ {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
+ {
+ }
+
+ xml_memory_page* allocate_page(size_t data_size)
+ {
+ size_t size = sizeof(xml_memory_page) + data_size;
+
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
+ if (!memory) return 0;
+
+ // align to next page boundary (note: this guarantees at least 1 usable byte before the page)
+ char* page_memory = reinterpret_cast((reinterpret_cast(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(page_memory);
+ assert(page);
+
+ page->allocator = _root->allocator;
+
+ // record the offset for freeing the memory block
+ assert(page_memory > memory && page_memory - static_cast(memory) <= 127);
+ page_memory[-1] = static_cast(page_memory - static_cast(memory));
+
+ return page;
+ }
+
+ static void deallocate_page(xml_memory_page* page)
+ {
+ char* page_memory = reinterpret_cast(page);
+
+ xml_memory::deallocate(page_memory - page_memory[-1]);
+ }
+
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+ void* allocate_memory(size_t size, xml_memory_page*& out_page)
+ {
+ if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+
+ void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size;
+
+ _busy_size += size;
+
+ out_page = _root;
+
+ return buf;
+ }
+
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
+ {
+ if (page == _root) page->busy_size = _busy_size;
+
+ assert(ptr >= reinterpret_cast(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast(page) + sizeof(xml_memory_page) + page->busy_size);
+ (void)!ptr;
+
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
+
+ if (page->freed_size == page->busy_size)
+ {
+ if (page->next == 0)
+ {
+ assert(_root == page);
+
+ // top page freed, just reset sizes
+ page->busy_size = page->freed_size = 0;
+ _busy_size = 0;
+ }
+ else
+ {
+ assert(_root != page);
+ assert(page->prev);
+
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
+
+ char_t* allocate_string(size_t length)
+ {
+ PUGI__STATIC_ASSERT(xml_memory_page_size <= (1 << 16));
+
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+ // round size up to pointer alignment boundary
+ size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast(allocate_memory(full_size, page));
+
+ if (!header) return 0;
+
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page);
+
+ assert(page_offset >= 0 && page_offset < (1 << 16));
+ header->page_offset = static_cast(page_offset);
+
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast(full_size < (1 << 16) ? full_size : 0);
+
+ // round-trip through void* to avoid 'cast increases required alignment of target type' warning
+ // header is guaranteed a pointer-sized alignment, which should be enough for char_t
+ return static_cast(static_cast(header + 1));
+ }
+
+ void deallocate_string(char_t* string)
+ {
+ // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
+ // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
+
+ // get header
+ xml_memory_string_header* header = static_cast(static_cast(string)) - 1;
+ assert(header);
+
+ // deallocate
+ size_t page_offset = sizeof(xml_memory_page) + header->page_offset;
+ xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset));
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+ };
+
+ PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+ {
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ out_page = page;
+
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold)
+ {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ }
+ else
+ {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+ }
+
+ // allocate inside page
+ page->busy_size = size;
+
+ return reinterpret_cast(page) + sizeof(xml_memory_page);
+ }
+PUGI__NS_END
+
+namespace pugi
+{
+ /// A 'name=value' XML attribute structure.
+ struct xml_attribute_struct
+ {
+ /// Default ctor
+ xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ char_t* name; ///< Pointer to attribute name.
+ char_t* value; ///< Pointer to attribute value.
+
+ xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
+ xml_attribute_struct* next_attribute; ///< Next attribute
+ };
+
+ /// An XML document tree node.
+ struct xml_node_struct
+ {
+ /// Default ctor
+ /// \param type - node type
+ xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ xml_node_struct* parent; ///< Pointer to parent
+
+ char_t* name; ///< Pointer to element name.
+ char_t* value; ///< Pointer to any associated string data.
+
+ xml_node_struct* first_child; ///< First child
+
+ xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
+ xml_node_struct* next_sibling; ///< Right brother
+
+ xml_attribute_struct* first_attribute; ///< First attribute
+ };
+}
+
+PUGI__NS_BEGIN
+ struct xml_extra_buffer
+ {
+ char_t* buffer;
+ xml_extra_buffer* next;
+ };
+
+ struct xml_document_struct: public xml_node_struct, public xml_allocator
+ {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
+ {
+ }
+
+ const char_t* buffer;
+
+ xml_extra_buffer* extra_buffers;
+ };
+
+ inline xml_allocator& get_allocator(const xml_node_struct* node)
+ {
+ assert(node);
+
+ return *reinterpret_cast(node->header & xml_memory_page_pointer_mask)->allocator;
+ }
+
+ template inline xml_document_struct& get_document(const Object* object)
+ {
+ assert(object);
+
+ return *static_cast(reinterpret_cast(object->header & xml_memory_page_pointer_mask)->allocator);
+ }
+PUGI__NS_END
+
+// Low-level DOM operations
+PUGI__NS_BEGIN
+ inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+
+ return new (memory) xml_attribute_struct(page);
+ }
+
+ inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+
+ return new (memory) xml_node_struct(page, type);
+ }
+
+ inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+ {
+ uintptr_t header = a->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast(header & xml_memory_page_pointer_mask));
+ }
+
+ inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+ {
+ uintptr_t header = n->header;
+
+ if (header & impl::xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
+ if (header & impl::xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; )
+ {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; )
+ {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast(header & xml_memory_page_pointer_mask));
+ }
+
+ inline void append_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ xml_node_struct* tail = head->prev_sibling_c;
+
+ tail->next_sibling = child;
+ child->prev_sibling_c = tail;
+ head->prev_sibling_c = child;
+ }
+ else
+ {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+ }
+
+ inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
+ {
+ child->parent = node;
+
+ xml_node_struct* head = node->first_child;
+
+ if (head)
+ {
+ child->prev_sibling_c = head->prev_sibling_c;
+ head->prev_sibling_c = child;
+ }
+ else
+ child->prev_sibling_c = child;
+
+ child->next_sibling = head;
+ node->first_child = child;
+ }
+
+ inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = child;
+ else
+ parent->first_child->prev_sibling_c = child;
+
+ child->next_sibling = node->next_sibling;
+ child->prev_sibling_c = node;
+
+ node->next_sibling = child;
+ }
+
+ inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ child->parent = parent;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = child;
+ else
+ parent->first_child = child;
+
+ child->prev_sibling_c = node->prev_sibling_c;
+ child->next_sibling = node;
+
+ node->prev_sibling_c = child;
+ }
+
+ inline void remove_node(xml_node_struct* node)
+ {
+ xml_node_struct* parent = node->parent;
+
+ if (node->next_sibling)
+ node->next_sibling->prev_sibling_c = node->prev_sibling_c;
+ else
+ parent->first_child->prev_sibling_c = node->prev_sibling_c;
+
+ if (node->prev_sibling_c->next_sibling)
+ node->prev_sibling_c->next_sibling = node->next_sibling;
+ else
+ parent->first_child = node->next_sibling;
+
+ node->parent = 0;
+ node->prev_sibling_c = 0;
+ node->next_sibling = 0;
+ }
+
+ inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ xml_attribute_struct* tail = head->prev_attribute_c;
+
+ tail->next_attribute = attr;
+ attr->prev_attribute_c = tail;
+ head->prev_attribute_c = attr;
+ }
+ else
+ {
+ node->first_attribute = attr;
+ attr->prev_attribute_c = attr;
+ }
+ }
+
+ inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ xml_attribute_struct* head = node->first_attribute;
+
+ if (head)
+ {
+ attr->prev_attribute_c = head->prev_attribute_c;
+ head->prev_attribute_c = attr;
+ }
+ else
+ attr->prev_attribute_c = attr;
+
+ attr->next_attribute = head;
+ node->first_attribute = attr;
+ }
+
+ inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ if (place->next_attribute)
+ place->next_attribute->prev_attribute_c = attr;
+ else
+ node->first_attribute->prev_attribute_c = attr;
+
+ attr->next_attribute = place->next_attribute;
+ attr->prev_attribute_c = place;
+ place->next_attribute = attr;
+ }
+
+ inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
+ {
+ if (place->prev_attribute_c->next_attribute)
+ place->prev_attribute_c->next_attribute = attr;
+ else
+ node->first_attribute = attr;
+
+ attr->prev_attribute_c = place->prev_attribute_c;
+ attr->next_attribute = place;
+ place->prev_attribute_c = attr;
+ }
+
+ inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ if (attr->next_attribute)
+ attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
+ else
+ node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
+
+ if (attr->prev_attribute_c->next_attribute)
+ attr->prev_attribute_c->next_attribute = attr->next_attribute;
+ else
+ node->first_attribute = attr->next_attribute;
+
+ attr->prev_attribute_c = 0;
+ attr->next_attribute = 0;
+ }
+
+ PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+ {
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ append_node(child, node);
+
+ return child;
+ }
+
+ PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
+ {
+ xml_attribute_struct* attr = allocate_attribute(alloc);
+ if (!attr) return 0;
+
+ append_attribute(attr, node);
+
+ return attr;
+ }
+PUGI__NS_END
+
+// Helper classes for code generation
+PUGI__NS_BEGIN
+ struct opt_false
+ {
+ enum { value = 0 };
+ };
+
+ struct opt_true
+ {
+ enum { value = 1 };
+ };
+PUGI__NS_END
+
+// Unicode utilities
+PUGI__NS_BEGIN
+ inline uint16_t endian_swap(uint16_t value)
+ {
+ return static_cast(((value & 0xff) << 8) | (value >> 8));
+ }
+
+ inline uint32_t endian_swap(uint32_t value)
+ {
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+ }
+
+ struct utf8_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+ };
+
+ struct utf8_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80)
+ {
+ *result = static_cast(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800)
+ {
+ result[0] = static_cast(0xC0 | (ch >> 6));
+ result[1] = static_cast(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else
+ {
+ result[0] = static_cast(0xE0 | (ch >> 12));
+ result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ // U+10000..U+10FFFF
+ result[0] = static_cast(0xF0 | (ch >> 18));
+ result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf16_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 2;
+ }
+ };
+
+ struct utf16_writer
+ {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ uint32_t msh = static_cast(ch - 0x10000) >> 10;
+ uint32_t lsh = static_cast(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast(0xD800 + msh);
+ result[1] = static_cast(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf32_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+ };
+
+ struct utf32_writer
+ {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+ };
+
+ struct latin1_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast(ch > 255 ? '?' : ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ (void)ch;
+
+ *result = '?';
+
+ return result + 1;
+ }
+ };
+
+ template struct wchar_selector;
+
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ };
+
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ };
+
+ typedef wchar_selector::counter wchar_counter;
+ typedef wchar_selector::writer wchar_writer;
+
+ template struct utf_decoder
+ {
+ static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size)
+ {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast(data) & 3) == 0)
+ {
+ // round-trip through void* to silence 'cast increases required alignment of target type' warnings
+ while (size >= 4 && (*static_cast(static_cast(data)) & 0x80808080) == 0)
+ {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if (static_cast(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if (static_cast(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if (static_cast(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
+ {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint16_t* end = data + size;
+
+ while (data < end)
+ {
+ unsigned int lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+E000..U+FFFF
+ else if (static_cast(lead - 0xE000) < 0x2000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // surrogate pair lead
+ else if (static_cast(lead - 0xD800) < 0x400 && data + 1 < end)
+ {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if (static_cast(next - 0xDC00) < 0x400)
+ {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint32_t* end = data + size;
+
+ while (data < end)
+ {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+10000..U+10FFFF
+ else
+ {
+ result = Traits::high(result, lead);
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_latin1_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ {
+ for (size_t i = 0; i < size; ++i)
+ {
+ result = Traits::low(result, data[i]);
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint16_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_utf16_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block_impl(const uint32_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_utf32_block(data, size, result);
+ }
+
+ static inline typename Traits::value_type decode_wchar_block(const wchar_t* data, size_t size, typename Traits::value_type result)
+ {
+ return decode_wchar_block_impl(reinterpret_cast::type*>(data), size, result);
+ }
+ };
+
+ template PUGI__FN void convert_utf_endian_swap(T* result, const T* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = static_cast(endian_swap(static_cast::type>(data[i])));
+ }
+#endif
+PUGI__NS_END
+
+PUGI__NS_BEGIN
+ enum chartype_t
+ {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+ };
+
+ static const unsigned char chartype_table[256] =
+ {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+ };
+
+ enum chartypex_t
+ {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ static const unsigned char chartypex_table[256] =
+ {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct))
+#else
+ #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast(c)] & (ct))
+#endif
+
+ #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
+ #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+ PUGI__FN bool is_little_endian()
+ {
+ unsigned int ui = 1;
+
+ return *reinterpret_cast(&ui) == 1;
+ }
+
+ PUGI__FN xml_encoding get_wchar_encoding()
+ {
+ PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ }
+
+ PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+ {
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, or (contents);
+
+ PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+ return guess_buffer_encoding(d0, d1, d2, d3);
+ }
+
+ PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ out_buffer = static_cast(const_cast(contents));
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ if (contents)
+ memcpy(buffer, contents, length * sizeof(char_t));
+ else
+ assert(length == 0);
+
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+ {
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+ }
+
+ PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const char_t* data = static_cast(contents);
+ size_t length = size / sizeof(char_t);
+
+ if (is_mutable)
+ {
+ char_t* buffer = const_cast(data);
+
+ convert_wchar_endian_swap(buffer, data, length);
+
+ out_buffer = buffer;
+ out_length = length;
+ }
+ else
+ {
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ convert_wchar_endian_swap(buffer, data, length);
+ buffer[length] = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+ }
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ {
+ const uint8_t* data = static_cast(contents);
+ size_t data_length = size;
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder::decode_utf8_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf8 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast(buffer);
+ wchar_writer::value_type oend = utf_decoder::decode_utf8_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast(contents);
+ size_t data_length = size / sizeof(uint16_t);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder::decode_utf16_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast(buffer);
+ wchar_writer::value_type oend = utf_decoder::decode_utf16_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast(contents);
+ size_t data_length = size / sizeof(uint32_t);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder::decode_utf32_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf32 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast(buffer);
+ wchar_writer::value_type oend = utf_decoder::decode_utf32_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ {
+ const uint8_t* data = static_cast(contents);
+ size_t data_length = size;
+
+ // get length in wchar_t units
+ size_t length = data_length;
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // convert latin1 input to wchar_t
+ wchar_writer::value_type obegin = reinterpret_cast(buffer);
+ wchar_writer::value_type oend = utf_decoder::decode_latin1_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ // fast path: no conversion required
+ if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf8
+ if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size);
+
+ assert(!"Invalid encoding");
+ return false;
+ }
+#else
+ template PUGI__FN bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast(contents);
+ size_t data_length = size / sizeof(uint16_t);
+
+ // first pass: get length in utf8 units
+ size_t length = utf_decoder::decode_utf16_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf16 input to utf8
+ uint8_t* obegin = reinterpret_cast(buffer);
+ uint8_t* oend = utf_decoder::decode_utf16_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ template PUGI__FN bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast(contents);
+ size_t data_length = size / sizeof(uint32_t);
+
+ // first pass: get length in utf8 units
+ size_t length = utf_decoder::decode_utf32_block(data, data_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert utf32 input to utf8
+ uint8_t* obegin = reinterpret_cast(buffer);
+ uint8_t* oend = utf_decoder::decode_utf32_block(data, data_length, obegin);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
+ {
+ for (size_t i = 0; i < size; ++i)
+ if (data[i] > 127)
+ return i;
+
+ return size;
+ }
+
+ PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const uint8_t* data = static_cast(contents);
+ size_t data_length = size;
+
+ // get size of prefix that does not need utf8 conversion
+ size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
+ assert(prefix_length <= data_length);
+
+ const uint8_t* postfix = data + prefix_length;
+ size_t postfix_length = data_length - prefix_length;
+
+ // if no conversion is needed, just return the original buffer
+ if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // first pass: get length in utf8 units
+ size_t length = prefix_length + utf_decoder::decode_latin1_block(postfix, postfix_length, 0);
+
+ // allocate buffer of suitable length
+ char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t)));
+ if (!buffer) return false;
+
+ // second pass: convert latin1 input to utf8
+ memcpy(buffer, data, prefix_length);
+
+ uint8_t* obegin = reinterpret_cast(buffer);
+ uint8_t* oend = utf_decoder::decode_latin1_block(postfix, postfix_length, obegin + prefix_length);
+
+ assert(oend == obegin + length);
+ *oend = 0;
+
+ out_buffer = buffer;
+ out_length = length + 1;
+
+ return true;
+ }
+
+ PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // fast path: no conversion required
+ if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is latin1
+ if (encoding == encoding_latin1) return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
+
+ assert(!"Invalid encoding");
+ return false;
+ }
+#endif
+
+ PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
+ {
+ // get length in utf8 characters
+ return utf_decoder::decode_wchar_block(str, length, 0);
+ }
+
+ PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
+ {
+ // convert to utf8
+ uint8_t* begin = reinterpret_cast(buffer);
+ uint8_t* end = utf_decoder::decode_wchar_block(str, length, begin);
+
+ assert(begin + size == end);
+ (void)!end;
+
+ // zero-terminate
+ buffer[size] = 0;
+ }
+
+#ifndef PUGIXML_NO_STL
+ PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
+ {
+ // first pass: get length in utf8 characters
+ size_t size = as_utf8_begin(str, length);
+
+ // allocate resulting string
+ std::string result;
+ result.resize(size);
+
+ // second pass: convert to utf8
+ if (size > 0) as_utf8_end(&result[0], size, str, length);
+
+ return result;
+ }
+
+ PUGI__FN std::basic_string as_wide_impl(const char* str, size_t size)
+ {
+ const uint8_t* data = reinterpret_cast(str);
+
+ // first pass: get length in wchar_t units
+ size_t length = utf_decoder::decode_utf8_block(data, size, 0);
+
+ // allocate resulting string
+ std::basic_string result;
+ result.resize(length);
+
+ // second pass: convert to wchar_t
+ if (length > 0)
+ {
+ wchar_writer::value_type begin = reinterpret_cast(&result[0]);
+ wchar_writer::value_type end = utf_decoder::decode_utf8_block(data, size, begin);
+
+ assert(begin + length == end);
+ (void)!end;
+ }
+
+ return result;
+ }
+#endif
+
+ inline bool strcpy_insitu_allow(size_t length, uintptr_t header, uintptr_t header_mask, char_t* target)
+ {
+ // never reuse shared memory
+ if (header & xml_memory_page_contents_shared_mask) return false;
+
+ size_t target_length = strlength(target);
+
+ // always reuse document buffer memory if possible
+ if ((header & header_mask) == 0) return target_length >= length;
+
+ // reuse heap memory if waste is not too great
+ const size_t reuse_threshold = 32;
+
+ return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
+ }
+
+ PUGI__FN bool strcpy_insitu(char_t*& dest, uintptr_t& header, uintptr_t header_mask, const char_t* source)
+ {
+ assert(header);
+
+ size_t source_length = strlength(source);
+
+ if (source_length == 0)
+ {
+ // empty string and null pointer are equivalent, so just deallocate old memory
+ xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator;
+
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // mark the string as not allocated
+ dest = 0;
+ header &= ~header_mask;
+
+ return true;
+ }
+ else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
+ {
+ // we can reuse old buffer, so just copy the new data (including zero terminator)
+ memcpy(dest, source, (source_length + 1) * sizeof(char_t));
+
+ return true;
+ }
+ else
+ {
+ xml_allocator* alloc = reinterpret_cast(header & xml_memory_page_pointer_mask)->allocator;
+
+ // allocate new buffer
+ char_t* buf = alloc->allocate_string(source_length + 1);
+ if (!buf) return false;
+
+ // copy the string (including zero terminator)
+ memcpy(buf, source, (source_length + 1) * sizeof(char_t));
+
+ // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
+ if (header & header_mask) alloc->deallocate_string(dest);
+
+ // the string is now allocated, so set the flag
+ dest = buf;
+ header |= header_mask;
+
+ return true;
+ }
+ }
+
+ struct gap
+ {
+ char_t* end;
+ size_t size;
+
+ gap(): end(0), size(0)
+ {
+ }
+
+ // Push new gap, move s count bytes further (skipping the gap).
+ // Collapse previous gap.
+ void push(char_t*& s, size_t count)
+ {
+ if (end) // there was a gap already; collapse it
+ {
+ // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end));
+ }
+
+ s += count; // end of current gap
+
+ // "merge" two gaps
+ end = s;
+ size += count;
+ }
+
+ // Collapse all gaps, return past-the-end pointer
+ char_t* flush(char_t* s)
+ {
+ if (end)
+ {
+ // Move [old_gap_end, current_pos) to [old_gap_start, ...)
+ assert(s >= end);
+ memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end));
+
+ return s - size;
+ }
+ else return s;
+ }
+ };
+
+ PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
+ {
+ char_t* stre = s + 1;
+
+ switch (*stre)
+ {
+ case '#': // ...
+ {
+ unsigned int ucsc = 0;
+
+ if (stre[1] == 'x') // ... (hex code)
+ {
+ stre += 2;
+
+ char_t ch = *stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast(ch - '0') <= 9)
+ ucsc = 16 * ucsc + (ch - '0');
+ else if (static_cast((ch | ' ') - 'a') <= 5)
+ ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+ else // ... (dec code)
+ {
+ char_t ch = *++stre;
+
+ if (ch == ';') return stre;
+
+ for (;;)
+ {
+ if (static_cast(static_cast(ch) - '0') <= 9)
+ ucsc = 10 * ucsc + (ch - '0');
+ else if (ch == ';')
+ break;
+ else // cancel
+ return stre;
+
+ ch = *++stre;
+ }
+
+ ++stre;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc));
+ #else
+ s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc));
+ #endif
+
+ g.push(s, stre - s);
+ return stre;
+ }
+
+ case 'a': // &a
+ {
+ ++stre;
+
+ if (*stre == 'm') // &am
+ {
+ if (*++stre == 'p' && *++stre == ';') // &
+ {
+ *s++ = '&';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ else if (*stre == 'p') // &ap
+ {
+ if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
+ {
+ *s++ = '\'';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ }
+ break;
+ }
+
+ case 'g': // &g
+ {
+ if (*++stre == 't' && *++stre == ';') // >
+ {
+ *s++ = '>';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'l': // &l
+ {
+ if (*++stre == 't' && *++stre == ';') // <
+ {
+ *s++ = '<';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ case 'q': // &q
+ {
+ if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
+ {
+ *s++ = '"';
+ ++stre;
+
+ g.push(s, stre - s);
+ return stre;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ return stre;
+ }
+
+ // Parser utilities
+ #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
+ #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
+ #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
+ #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
+ #define PUGI__POPNODE() { cursor = cursor->parent; }
+ #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
+ #define PUGI__SCANWHILE(X) { while (X) ++s; }
+ #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
+ #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
+ #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast(0)
+ #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
+
+ PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + (s[2] == '>' ? 3 : 2);
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
+
+ if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == 0)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ typedef char_t* (*strconv_pcdata_t)(char_t*);
+
+ template struct strconv_pcdata_impl
+ {
+ static char_t* parse(char_t* s)
+ {
+ gap g;
+
+ char_t* begin = s;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
+
+ if (*s == '<') // PCDATA ends here
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s + 1;
+ }
+ else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
+ {
+ *s++ = '\n'; // replace first one with 0x0a
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (*s == 0)
+ {
+ char_t* end = g.flush(s);
+
+ if (opt_trim::value)
+ while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
+ --end;
+
+ *end = 0;
+
+ return s;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
+
+ switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
+ {
+ case 0: return strconv_pcdata_impl::parse;
+ case 1: return strconv_pcdata_impl::parse;
+ case 2: return strconv_pcdata_impl::parse;
+ case 3: return strconv_pcdata_impl::parse;
+ case 4: return strconv_pcdata_impl::parse;
+ case 5: return strconv_pcdata_impl::parse;
+ case 6: return strconv_pcdata_impl::parse;
+ case 7: return strconv_pcdata_impl::parse;
+ default: assert(false); return 0; // should not get here
+ }
+ }
+
+ typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
+
+ template struct strconv_attribute_impl
+ {
+ static char_t* parse_wnorm(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ // trim leading whitespaces
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s;
+
+ do ++str;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ g.push(s, str - s);
+ }
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
+
+ if (*s == end_quote)
+ {
+ char_t* str = g.flush(s);
+
+ do *str-- = 0;
+ while (PUGI__IS_CHARTYPE(*str, ct_space));
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ *s++ = ' ';
+
+ if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ char_t* str = s + 1;
+ while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
+
+ g.push(s, str - s);
+ }
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_wconv(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (PUGI__IS_CHARTYPE(*s, ct_space))
+ {
+ if (*s == '\r')
+ {
+ *s++ = ' ';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else *s++ = ' ';
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_eol(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (*s == '\r')
+ {
+ *s++ = '\n';
+
+ if (*s == '\n') g.push(s, 1);
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+
+ static char_t* parse_simple(char_t* s, char_t end_quote)
+ {
+ gap g;
+
+ while (true)
+ {
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
+
+ if (*s == end_quote)
+ {
+ *g.flush(s) = 0;
+
+ return s + 1;
+ }
+ else if (opt_escape::value && *s == '&')
+ {
+ s = strconv_escape(s, g);
+ }
+ else if (!*s)
+ {
+ return 0;
+ }
+ else ++s;
+ }
+ }
+ };
+
+ PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
+ {
+ PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
+
+ switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
+ {
+ case 0: return strconv_attribute_impl::parse_simple;
+ case 1: return strconv_attribute_impl::parse_simple;
+ case 2: return strconv_attribute_impl::parse_eol;
+ case 3: return strconv_attribute_impl::parse_eol;
+ case 4: return strconv_attribute_impl::parse_wconv;
+ case 5: return strconv_attribute_impl::parse_wconv;
+ case 6: return strconv_attribute_impl::parse_wconv;
+ case 7: return strconv_attribute_impl::parse_wconv;
+ case 8: return strconv_attribute_impl::parse_wnorm;
+ case 9: return strconv_attribute_impl::parse_wnorm;
+ case 10: return strconv_attribute_impl::parse_wnorm;
+ case 11: return strconv_attribute_impl::parse_wnorm;
+ case 12: return strconv_attribute_impl::parse_wnorm;
+ case 13: return strconv_attribute_impl::parse_wnorm;
+ case 14: return strconv_attribute_impl::parse_wnorm;
+ case 15: return strconv_attribute_impl::parse_wnorm;
+ default: assert(false); return 0; // should not get here
+ }
+ }
+
+ inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
+ {
+ xml_parse_result result;
+ result.status = status;
+ result.offset = offset;
+
+ return result;
+ }
+
+ struct xml_parser
+ {
+ xml_allocator alloc;
+ char_t* error_offset;
+ xml_parse_status error_status;
+
+ xml_parser(const xml_allocator& alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
+ {
+ }
+
+ // DOCTYPE consists of nested sections of the following possible types:
+ // , ... ?>, "...", '...'
+ //
+ //
+ // First group can not contain nested groups
+ // Second group can contain nested groups of the same type
+ // Third group can contain all other groups
+ char_t* parse_doctype_primitive(char_t* s)
+ {
+ if (*s == '"' || *s == '\'')
+ {
+ // quoted string
+ char_t ch = *s++;
+ PUGI__SCANFOR(*s == ch);
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s++;
+ }
+ else if (s[0] == '<' && s[1] == '?')
+ {
+ // ... ?>
+ s += 2;
+ PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 2;
+ }
+ else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
+ {
+ s += 4;
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
+ if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ s += 4;
+ }
+ else PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_doctype_ignore(char_t* s)
+ {
+ assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
+ s++;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] == '[')
+ {
+ // nested ignore section
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ }
+ else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
+ {
+ // ignore section end
+ s += 3;
+
+ return s;
+ }
+ else s++;
+ }
+
+ PUGI__THROW_ERROR(status_bad_doctype, s);
+ }
+
+ char_t* parse_doctype_group(char_t* s, char_t endch, bool toplevel)
+ {
+ assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
+ s++;
+
+ while (*s)
+ {
+ if (s[0] == '<' && s[1] == '!' && s[2] != '-')
+ {
+ if (s[2] == '[')
+ {
+ // ignore
+ s = parse_doctype_ignore(s);
+ if (!s) return s;
+ }
+ else
+ {
+ // some control group
+ s = parse_doctype_group(s, endch, false);
+ if (!s) return s;
+
+ // skip >
+ assert(*s == '>');
+ s++;
+ }
+ }
+ else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
+ {
+ // unknown tag (forbidden), or some primitive group
+ s = parse_doctype_primitive(s);
+ if (!s) return s;
+ }
+ else if (*s == '>')
+ {
+ return s;
+ }
+ else s++;
+ }
+
+ if (!toplevel || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ return s;
+ }
+
+ char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
+ {
+ // parse node contents, starting with exclamation mark
+ ++s;
+
+ if (*s == '-') // 'value = s; // Save the offset.
+ }
+
+ if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
+ {
+ s = strconv_comment(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating '-->'.
+ PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_comment, s);
+
+ if (PUGI__OPTSET(parse_comments))
+ *s = 0; // Zero-terminate this segment at the first terminating '-'.
+
+ s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_comment, s);
+ }
+ else if (*s == '[')
+ {
+ // 'value = s; // Save the offset.
+
+ if (PUGI__OPTSET(parse_eol))
+ {
+ s = strconv_cdata(s, endch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
+ }
+ else
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ *s++ = 0; // Zero-terminate this segment.
+ }
+ }
+ else // Flagged for discard, but we still have to scan for the terminator.
+ {
+ // Scan for terminating ']]>'.
+ PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
+ PUGI__CHECK_ERROR(status_bad_cdata, s);
+
+ ++s;
+ }
+
+ s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
+ }
+ else PUGI__THROW_ERROR(status_bad_cdata, s);
+ }
+ else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
+ {
+ s -= 2;
+
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
+
+ char_t* mark = s + 9;
+
+ s = parse_doctype_group(s, endch, true);
+ if (!s) return s;
+
+ assert((*s == 0 && endch == '>') || *s == '>');
+ if (*s) *s++ = 0;
+
+ if (PUGI__OPTSET(parse_doctype))
+ {
+ while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
+
+ PUGI__PUSHNODE(node_doctype);
+
+ cursor->value = mark;
+ }
+ }
+ else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
+ else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
+ else PUGI__THROW_ERROR(status_unrecognized_tag, s);
+
+ return s;
+ }
+
+ char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
+ {
+ // load into registers
+ xml_node_struct* cursor = ref_cursor;
+ char_t ch = 0;
+
+ // parse node contents, starting with question mark
+ ++s;
+
+ // read PI target
+ char_t* target = s;
+
+ if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ // determine node type; stricmp / strcasecmp is not portable
+ bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
+
+ if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
+ {
+ if (declaration)
+ {
+ // disallow non top-level declarations
+ if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
+
+ PUGI__PUSHNODE(node_declaration);
+ }
+ else
+ {
+ PUGI__PUSHNODE(node_pi);
+ }
+
+ cursor->name = target;
+
+ PUGI__ENDSEG();
+
+ // parse value/attributes
+ if (ch == '?')
+ {
+ // empty node
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
+ s += (*s == '>');
+
+ PUGI__POPNODE();
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS();
+
+ // scan for tag end
+ char_t* value = s;
+
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ if (declaration)
+ {
+ // replace ending ? with / so that 'element' terminates properly
+ *s = '/';
+
+ // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
+ s = value;
+ }
+ else
+ {
+ // store value and step over >
+ cursor->value = value;
+ PUGI__POPNODE();
+
+ PUGI__ENDSEG();
+
+ s += (*s == '>');
+ }
+ }
+ else PUGI__THROW_ERROR(status_bad_pi, s);
+ }
+ else
+ {
+ // scan for tag end
+ PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
+ PUGI__CHECK_ERROR(status_bad_pi, s);
+
+ s += (s[1] == '>' ? 2 : 1);
+ }
+
+ // store from registers
+ ref_cursor = cursor;
+
+ return s;
+ }
+
+ char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
+ {
+ strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
+ strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
+
+ char_t ch = 0;
+ xml_node_struct* cursor = root;
+ char_t* mark = s;
+
+ while (*s != 0)
+ {
+ if (*s == '<')
+ {
+ ++s;
+
+ LOC_TAG:
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
+ {
+ PUGI__PUSHNODE(node_element); // Append a new node to the tree.
+
+ cursor->name = s;
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (ch == '>')
+ {
+ // end of tag
+ }
+ else if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ LOC_ATTRIBUTES:
+ while (true)
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
+ {
+ xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
+ if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
+
+ a->name = s; // Save the offset.
+
+ PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
+ PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
+
+ if (PUGI__IS_CHARTYPE(ch, ct_space))
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ ch = *s;
+ ++s;
+ }
+
+ if (ch == '=') // '<... #=...'
+ {
+ PUGI__SKIPWS(); // Eat any whitespace.
+
+ if (*s == '"' || *s == '\'') // '<... #="...'
+ {
+ ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
+ ++s; // Step over the quote.
+ a->value = s; // Save the offset.
+
+ s = strconv_attribute(s, ch);
+
+ if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
+
+ // After this line the loop continues from the start;
+ // Whitespaces, / and > are ok, symbols and EOF are wrong,
+ // everything else will be detected
+ if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_attribute, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ if (*s == '>')
+ {
+ PUGI__POPNODE();
+ s++;
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ PUGI__POPNODE();
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '>')
+ {
+ ++s;
+
+ break;
+ }
+ else if (*s == 0 && endch == '>')
+ {
+ break;
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+
+ // !!!
+ }
+ else if (ch == '/') // '<#.../'
+ {
+ if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
+
+ PUGI__POPNODE(); // Pop.
+
+ s += (*s == '>');
+ }
+ else if (ch == 0)
+ {
+ // we stepped over null terminator, backtrack & handle closing tag
+ --s;
+
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else PUGI__THROW_ERROR(status_bad_start_element, s);
+ }
+ else if (*s == '/')
+ {
+ ++s;
+
+ char_t* name = cursor->name;
+ if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ while (PUGI__IS_CHARTYPE(*s, ct_symbol))
+ {
+ if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ if (*name)
+ {
+ if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
+ else PUGI__THROW_ERROR(status_end_element_mismatch, s);
+ }
+
+ PUGI__POPNODE(); // Pop.
+
+ PUGI__SKIPWS();
+
+ if (*s == 0)
+ {
+ if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ }
+ else
+ {
+ if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
+ ++s;
+ }
+ }
+ else if (*s == '?') // '...'
+ {
+ s = parse_question(s, cursor, optmsk, endch);
+ if (!s) return s;
+
+ assert(cursor);
+ if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
+ }
+ else if (*s == '!') // 'first_child) continue;
+ }
+ }
+
+ if (!PUGI__OPTSET(parse_trim_pcdata))
+ s = mark;
+
+ if (cursor->parent || PUGI__OPTSET(parse_fragment))
+ {
+ PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
+ cursor->value = s; // Save the offset.
+
+ s = strconv_pcdata(s);
+
+ PUGI__POPNODE(); // Pop since this is a standalone.
+
+ if (!*s) break;
+ }
+ else
+ {
+ PUGI__SCANFOR(*s == '<'); // '...<'
+ if (!*s) break;
+
+ ++s;
+ }
+
+ // We're after '<'
+ goto LOC_TAG;
+ }
+ }
+
+ // check that last tag is closed
+ if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
+
+ return s;
+ }
+
+ #ifdef PUGIXML_WCHAR_MODE
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ unsigned int bom = 0xfeff;
+ return (s[0] == static_cast(bom)) ? s + 1 : s;
+ }
+ #else
+ static char_t* parse_skip_bom(char_t* s)
+ {
+ return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
+ }
+ #endif
+
+ static bool has_element_node_siblings(xml_node_struct* node)
+ {
+ while (node)
+ {
+ if (PUGI__NODETYPE(node) == node_element) return true;
+
+ node = node->next_sibling;
+ }
+
+ return false;
+ }
+
+ static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
+ {
+ // allocator object is a part of document object
+ xml_allocator& alloc_ = *static_cast(xmldoc);
+
+ // early-out for empty documents
+ if (length == 0)
+ return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
+
+ // get last child of the root before parsing
+ xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c : 0;
+
+ // create parser on stack
+ xml_parser parser(alloc_);
+
+ // save last character and make buffer zero-terminated (speeds up parsing)
+ char_t endch = buffer[length - 1];
+ buffer[length - 1] = 0;
+
+ // skip BOM to make sure it does not end up as part of parse output
+ char_t* buffer_data = parse_skip_bom(buffer);
+
+ // perform actual parsing
+ parser.parse_tree(buffer_data, root, optmsk, endch);
+
+ // update allocator state
+ alloc_ = parser.alloc;
+
+ xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
+ assert(result.offset >= 0 && static_cast(result.offset) <= length);
+
+ if (result)
+ {
+ // since we removed last character, we have to handle the only possible false positive (stray <)
+ if (endch == '<')
+ return make_parse_result(status_unrecognized_tag, length - 1);
+
+ // check if there are any element nodes parsed
+ xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling : root->first_child;
+
+ if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
+ return make_parse_result(status_no_document_element, length - 1);
+ }
+ else
+ {
+ // roll back offset if it occurs on a null terminator in the source buffer
+ if (result.offset > 0 && static_cast(result.offset) == length - 1 && endch == 0)
+ result.offset--;
+ }
+
+ return result;
+ }
+ };
+
+ // Output facilities
+ PUGI__FN xml_encoding get_write_native_encoding()
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ return get_wchar_encoding();
+ #else
+ return encoding_utf8;
+ #endif
+ }
+
+ PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
+ {
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // assume utf8 encoding
+ return encoding_utf8;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 1) return 0;
+
+ // discard last character if it's the lead of a surrogate pair
+ return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, get_wchar_encoding()))
+ {
+ convert_wchar_endian_swap(r_char, data, length);
+
+ return length * sizeof(char_t);
+ }
+
+ // convert to utf8
+ if (encoding == encoding_utf8)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest);
+
+ return static_cast(end - dest);
+ }
+
+ // convert to utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ uint16_t* dest = r_u16;
+
+ // convert to native utf16
+ uint16_t* end = utf_decoder::decode_wchar_block(data, length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
+
+ return static_cast(end - dest) * sizeof(uint16_t);
+ }
+
+ // convert to utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ uint32_t* dest = r_u32;
+
+ // convert to native utf32
+ uint32_t* end = utf_decoder::decode_wchar_block(data, length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
+
+ return static_cast(end - dest) * sizeof(uint32_t);
+ }
+
+ // convert to latin1
+ if (encoding == encoding_latin1)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder::decode_wchar_block(data, length, dest);
+
+ return static_cast(end - dest);
+ }
+
+ assert(!"Invalid encoding");
+ return 0;
+ }
+#else
+ PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
+ {
+ if (length < 5) return 0;
+
+ for (size_t i = 1; i <= 4; ++i)
+ {
+ uint8_t ch = static_cast(data[length - i]);
+
+ // either a standalone character or a leading one
+ if ((ch & 0xc0) != 0x80) return length - i;
+ }
+
+ // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
+ return length;
+ }
+
+ PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
+ {
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ uint16_t* dest = r_u16;
+
+ // convert to native utf16
+ uint16_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
+
+ return static_cast(end - dest) * sizeof(uint16_t);
+ }
+
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ uint32_t* dest = r_u32;
+
+ // convert to native utf32
+ uint32_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest);
+
+ // swap if necessary
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ if (native_encoding != encoding) convert_utf_endian_swap(dest, dest, static_cast(end - dest));
+
+ return static_cast(end - dest) * sizeof(uint32_t);
+ }
+
+ if (encoding == encoding_latin1)
+ {
+ uint8_t* dest = r_u8;
+ uint8_t* end = utf_decoder::decode_utf8_block(reinterpret_cast(data), length, dest);
+
+ return static_cast(end - dest);
+ }
+
+ assert(!"Invalid encoding");
+ return 0;
+ }
+#endif
+
+ class xml_buffered_writer
+ {
+ xml_buffered_writer(const xml_buffered_writer&);
+ xml_buffered_writer& operator=(const xml_buffered_writer&);
+
+ public:
+ xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
+ {
+ PUGI__STATIC_ASSERT(bufcapacity >= 8);
+ }
+
+ ~xml_buffered_writer()
+ {
+ flush();
+ }
+
+ size_t flush()
+ {
+ flush(buffer, bufsize);
+ bufsize = 0;
+ return 0;
+ }
+
+ void flush(const char_t* data, size_t size)
+ {
+ if (size == 0) return;
+
+ // fast path, just write data
+ if (encoding == get_write_native_encoding())
+ writer.write(data, size * sizeof(char_t));
+ else
+ {
+ // convert chunk
+ size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
+ assert(result <= sizeof(scratch));
+
+ // write data
+ writer.write(scratch.data_u8, result);
+ }
+ }
+
+ void write_direct(const char_t* data, size_t length)
+ {
+ // flush the remaining buffer contents
+ flush();
+
+ // handle large chunks
+ if (length > bufcapacity)
+ {
+ if (encoding == get_write_native_encoding())
+ {
+ // fast path, can just write data chunk
+ writer.write(data, length * sizeof(char_t));
+ return;
+ }
+
+ // need to convert in suitable chunks
+ while (length > bufcapacity)
+ {
+ // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
+ // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
+ size_t chunk_size = get_valid_length(data, bufcapacity);
+ assert(chunk_size);
+
+ // convert chunk and write
+ flush(data, chunk_size);
+
+ // iterate
+ data += chunk_size;
+ length -= chunk_size;
+ }
+
+ // small tail is copied below
+ bufsize = 0;
+ }
+
+ memcpy(buffer + bufsize, data, length * sizeof(char_t));
+ bufsize += length;
+ }
+
+ void write_buffer(const char_t* data, size_t length)
+ {
+ size_t offset = bufsize;
+
+ if (offset + length <= bufcapacity)
+ {
+ memcpy(buffer + offset, data, length * sizeof(char_t));
+ bufsize = offset + length;
+ }
+ else
+ {
+ write_direct(data, length);
+ }
+ }
+
+ void write_string(const char_t* data)
+ {
+ // write the part of the string that fits in the buffer
+ size_t offset = bufsize;
+
+ while (*data && offset < bufcapacity)
+ buffer[offset++] = *data++;
+
+ // write the rest
+ if (offset < bufcapacity)
+ {
+ bufsize = offset;
+ }
+ else
+ {
+ // backtrack a bit if we have split the codepoint
+ size_t length = offset - bufsize;
+ size_t extra = length - get_valid_length(data - length, length);
+
+ bufsize = offset - extra;
+
+ write_direct(data - extra, strlength(data) + extra);
+ }
+ }
+
+ void write(char_t d0)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 1) offset = flush();
+
+ buffer[offset + 0] = d0;
+ bufsize = offset + 1;
+ }
+
+ void write(char_t d0, char_t d1)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 2) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ bufsize = offset + 2;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 3) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ bufsize = offset + 3;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 4) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ bufsize = offset + 4;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 5) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ bufsize = offset + 5;
+ }
+
+ void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
+ {
+ size_t offset = bufsize;
+ if (offset > bufcapacity - 6) offset = flush();
+
+ buffer[offset + 0] = d0;
+ buffer[offset + 1] = d1;
+ buffer[offset + 2] = d2;
+ buffer[offset + 3] = d3;
+ buffer[offset + 4] = d4;
+ buffer[offset + 5] = d5;
+ bufsize = offset + 6;
+ }
+
+ // utf8 maximum expansion: x4 (-> utf32)
+ // utf16 maximum expansion: x2 (-> utf32)
+ // utf32 maximum expansion: x1
+ enum
+ {
+ bufcapacitybytes =
+ #ifdef PUGIXML_MEMORY_OUTPUT_STACK
+ PUGIXML_MEMORY_OUTPUT_STACK
+ #else
+ 10240
+ #endif
+ ,
+ bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
+ };
+
+ char_t buffer[bufcapacity];
+
+ union
+ {
+ uint8_t data_u8[4 * bufcapacity];
+ uint16_t data_u16[2 * bufcapacity];
+ uint32_t data_u32[bufcapacity];
+ char_t data_char[bufcapacity];
+ } scratch;
+
+ xml_writer& writer;
+ size_t bufsize;
+ xml_encoding encoding;
+ };
+
+ PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
+ {
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // While *s is a usual symbol
+ PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ switch (*s)
+ {
+ case 0: break;
+ case '&':
+ writer.write('&', 'a', 'm', 'p', ';');
+ ++s;
+ break;
+ case '<':
+ writer.write('&', 'l', 't', ';');
+ ++s;
+ break;
+ case '>':
+ writer.write('&', 'g', 't', ';');
+ ++s;
+ break;
+ case '"':
+ writer.write('&', 'q', 'u', 'o', 't', ';');
+ ++s;
+ break;
+ default: // s is not a usual symbol
+ {
+ unsigned int ch = static_cast(*s++);
+ assert(ch < 32);
+
+ writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';');
+ }
+ }
+ }
+ }
+
+ PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
+ {
+ if (flags & format_no_escapes)
+ writer.write_string(s);
+ else
+ text_output_escaped(writer, s, type);
+ }
+
+ PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
+ {
+ do
+ {
+ writer.write('<', '!', '[', 'C', 'D');
+ writer.write('A', 'T', 'A', '[');
+
+ const char_t* prev = s;
+
+ // look for ]]> sequence - we can't output it as is since it terminates CDATA
+ while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
+
+ // skip ]] if we stopped at ]]>, > will go to the next CDATA section
+ if (*s) s += 2;
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ writer.write(']', ']', '>');
+ }
+ while (*s);
+ }
+
+ PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
+ {
+ switch (indent_length)
+ {
+ case 1:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0]);
+ break;
+ }
+
+ case 2:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1]);
+ break;
+ }
+
+ case 3:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2]);
+ break;
+ }
+
+ case 4:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write(indent[0], indent[1], indent[2], indent[3]);
+ break;
+ }
+
+ default:
+ {
+ for (unsigned int i = 0; i < depth; ++i)
+ writer.write_buffer(indent, indent_length);
+ }
+ }
+ }
+
+ PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
+ {
+ writer.write('<', '!', '-', '-');
+
+ while (*s)
+ {
+ const char_t* prev = s;
+
+ // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
+ while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
+
+ writer.write_buffer(prev, static_cast(s - prev));
+
+ if (*s)
+ {
+ assert(*s == '-');
+
+ writer.write('-', ' ');
+ ++s;
+ }
+ }
+
+ writer.write('-', '-', '>');
+ }
+
+ PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+ {
+ writer.write(' ');
+ writer.write_string(a->name ? a->name : default_name);
+ writer.write('=', '"');
+
+ if (a->value)
+ text_output(writer, a->value, ctx_special_attr, flags);
+
+ writer.write('"');
+ }
+ }
+
+ PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name : default_name;
+
+ writer.write('<');
+ writer.write_string(name);
+
+ if (node->first_attribute)
+ node_output_attributes(writer, node, flags);
+
+ if (flags & format_raw)
+ {
+ if (!node->first_child)
+ writer.write(' ', '/', '>');
+ else
+ {
+ writer.write('>');
+
+ return true;
+ }
+ }
+ else
+ {
+ xml_node_struct* first = node->first_child;
+
+ if (!first)
+ writer.write(' ', '/', '>', '\n');
+ else if (!first->next_sibling && (PUGI__NODETYPE(first) == node_pcdata || PUGI__NODETYPE(first) == node_cdata))
+ {
+ writer.write('>');
+
+ const char_t* value = first->value ? first->value : PUGIXML_TEXT("");
+
+ if (PUGI__NODETYPE(first) == node_pcdata)
+ text_output(writer, value, ctx_special_pcdata, flags);
+ else
+ text_output_cdata(writer, value);
+
+ writer.write('<', '/');
+ writer.write_string(name);
+ writer.write('>', '\n');
+ }
+ else
+ {
+ writer.write('>', '\n');
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+ const char_t* name = node->name ? node->name : default_name;
+
+ writer.write('<', '/');
+ writer.write_string(name);
+
+ if (flags & format_raw)
+ writer.write('>');
+ else
+ writer.write('>', '\n');
+ }
+
+ PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
+ {
+ const char_t* default_name = PUGIXML_TEXT(":anonymous");
+
+ switch (PUGI__NODETYPE(node))
+ {
+ case node_pcdata:
+ text_output(writer, node->value ? node->value : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_cdata:
+ text_output_cdata(writer, node->value ? node->value : PUGIXML_TEXT(""));
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_comment:
+ node_output_comment(writer, node->value ? node->value : PUGIXML_TEXT(""));
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_pi:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name : default_name);
+
+ if (node->value)
+ {
+ writer.write(' ');
+ writer.write_string(node->value);
+ }
+
+ writer.write('?', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_declaration:
+ writer.write('<', '?');
+ writer.write_string(node->name ? node->name : default_name);
+ node_output_attributes(writer, node, flags);
+ writer.write('?', '>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ case node_doctype:
+ writer.write('<', '!', 'D', 'O', 'C');
+ writer.write('T', 'Y', 'P', 'E');
+
+ if (node->value)
+ {
+ writer.write(' ');
+ writer.write_string(node->value);
+ }
+
+ writer.write('>');
+ if ((flags & format_raw) == 0) writer.write('\n');
+ break;
+
+ default:
+ assert(!"Invalid node type");
+ }
+ }
+
+ PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
+ {
+ size_t indent_length = ((flags & (format_indent | format_raw)) == format_indent) ? strlength(indent) : 0;
+
+ xml_node_struct* node = root;
+
+ do
+ {
+ assert(node);
+
+ // begin writing current node
+ if (indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ if (PUGI__NODETYPE(node) == node_element)
+ {
+ if (node_output_start(writer, node, flags))
+ {
+ node = node->first_child;
+ depth++;
+ continue;
+ }
+ }
+ else if (PUGI__NODETYPE(node) == node_document)
+ {
+ if (node->first_child)
+ {
+ node = node->first_child;
+ continue;
+ }
+ }
+ else
+ {
+ node_output_simple(writer, node, flags);
+ }
+
+ // continue to the next node
+ while (node != root)
+ {
+ if (node->next_sibling)
+ {
+ node = node->next_sibling;
+ break;
+ }
+
+ node = node->parent;
+
+ // write closing node
+ if (PUGI__NODETYPE(node) == node_element)
+ {
+ depth--;
+
+ if (indent_length)
+ text_output_indent(writer, indent, indent_length, depth);
+
+ node_output_end(writer, node, flags);
+ }
+ }
+ }
+ while (node != root);
+ }
+
+ PUGI__FN bool has_declaration(xml_node_struct* node)
+ {
+ for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
+ {
+ xml_node_type type = PUGI__NODETYPE(child);
+
+ if (type == node_declaration) return true;
+ if (type == node_element) return false;
+ }
+
+ return false;
+ }
+
+ PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
+ {
+ for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
+ if (a == attr)
+ return true;
+
+ return false;
+ }
+
+ PUGI__FN bool allow_insert_attribute(xml_node_type parent)
+ {
+ return parent == node_element || parent == node_declaration;
+ }
+
+ PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
+ {
+ if (parent != node_document && parent != node_element) return false;
+ if (child == node_document || child == node_null) return false;
+ if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
+
+ return true;
+ }
+
+ PUGI__FN bool allow_move(xml_node parent, xml_node child)
+ {
+ // check that child can be a child of parent
+ if (!allow_insert_child(parent.type(), child.type()))
+ return false;
+
+ // check that node is not moved between documents
+ if (parent.root() != child.root())
+ return false;
+
+ // check that new parent is not in the child subtree
+ xml_node cur = parent;
+
+ while (cur)
+ {
+ if (cur == child)
+ return false;
+
+ cur = cur.parent();
+ }
+
+ return true;
+ }
+
+ PUGI__FN void node_copy_string(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char_t* source, uintptr_t& source_header, xml_allocator* alloc)
+ {
+ assert(!dest && (header & header_mask) == 0);
+
+ if (source)
+ {
+ if (alloc && (source_header & header_mask) == 0)
+ {
+ dest = source;
+
+ // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
+ header |= xml_memory_page_contents_shared_mask;
+ source_header |= xml_memory_page_contents_shared_mask;
+ }
+ else
+ strcpy_insitu(dest, header, header_mask, source);
+ }
+ }
+
+ PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
+ {
+ node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
+ node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
+
+ for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
+ {
+ xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
+
+ if (da)
+ {
+ node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+ node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+ }
+ }
+ }
+
+ PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
+ {
+ xml_allocator& alloc = get_allocator(dn);
+ xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
+
+ node_copy_contents(dn, sn, shared_alloc);
+
+ xml_node_struct* dit = dn;
+ xml_node_struct* sit = sn->first_child;
+
+ while (sit && sit != sn)
+ {
+ if (sit != dn)
+ {
+ xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
+
+ if (copy)
+ {
+ node_copy_contents(copy, sit, shared_alloc);
+
+ if (sit->first_child)
+ {
+ dit = copy;
+ sit = sit->first_child;
+ continue;
+ }
+ }
+ }
+
+ // continue to the next node
+ do
+ {
+ if (sit->next_sibling)
+ {
+ sit = sit->next_sibling;
+ break;
+ }
+
+ sit = sit->parent;
+ dit = dit->parent;
+ }
+ while (sit != sn);
+ }
+ }
+
+ inline bool is_text_node(xml_node_struct* node)
+ {
+ xml_node_type type = PUGI__NODETYPE(node);
+
+ return type == node_pcdata || type == node_cdata;
+ }
+
+ // get value with conversion functions
+ PUGI__FN int get_integer_base(const char_t* value)
+ {
+ const char_t* s = value;
+
+ while (PUGI__IS_CHARTYPE(*s, ct_space))
+ s++;
+
+ if (*s == '-')
+ s++;
+
+ return (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) ? 16 : 10;
+ }
+
+ PUGI__FN int get_value_int(const char_t* value, int def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast(wcstol(value, 0, base));
+ #else
+ return static_cast(strtol(value, 0, base));
+ #endif
+ }
+
+ PUGI__FN unsigned int get_value_uint(const char_t* value, unsigned int def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast(wcstoul(value, 0, base));
+ #else
+ return static_cast(strtoul(value, 0, base));
+ #endif
+ }
+
+ PUGI__FN double get_value_double(const char_t* value, double def)
+ {
+ if (!value) return def;
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcstod(value, 0);
+ #else
+ return strtod(value, 0);
+ #endif
+ }
+
+ PUGI__FN float get_value_float(const char_t* value, float def)
+ {
+ if (!value) return def;
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return static_cast(wcstod(value, 0));
+ #else
+ return static_cast(strtod(value, 0));
+ #endif
+ }
+
+ PUGI__FN bool get_value_bool(const char_t* value, bool def)
+ {
+ if (!value) return def;
+
+ // only look at first char
+ char_t first = *value;
+
+ // 1*, t* (true), T* (True), y* (yes), Y* (YES)
+ return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN long long get_value_llong(const char_t* value, long long def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _wcstoi64(value, 0, base);
+ #else
+ return wcstoll(value, 0, base);
+ #endif
+ #else
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _strtoi64(value, 0, base);
+ #else
+ return strtoll(value, 0, base);
+ #endif
+ #endif
+ }
+
+ PUGI__FN unsigned long long get_value_ullong(const char_t* value, unsigned long long def)
+ {
+ if (!value) return def;
+
+ int base = get_integer_base(value);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _wcstoui64(value, 0, base);
+ #else
+ return wcstoull(value, 0, base);
+ #endif
+ #else
+ #ifdef PUGI__MSVC_CRT_VERSION
+ return _strtoui64(value, 0, base);
+ #else
+ return strtoull(value, 0, base);
+ #endif
+ #endif
+ }
+#endif
+
+ // set value with conversion functions
+ PUGI__FN bool set_value_buffer(char_t*& dest, uintptr_t& header, uintptr_t header_mask, char (&buf)[128])
+ {
+ #ifdef PUGIXML_WCHAR_MODE
+ char_t wbuf[128];
+ impl::widen_ascii(wbuf, buf);
+
+ return strcpy_insitu(dest, header, header_mask, wbuf);
+ #else
+ return strcpy_insitu(dest, header, header_mask, buf);
+ #endif
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, int value)
+ {
+ char buf[128];
+ sprintf(buf, "%d", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned int value)
+ {
+ char buf[128];
+ sprintf(buf, "%u", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, double value)
+ {
+ char buf[128];
+ sprintf(buf, "%g", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, bool value)
+ {
+ return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
+ }
+
+#ifdef PUGIXML_HAS_LONG_LONG
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, long long value)
+ {
+ char buf[128];
+ sprintf(buf, "%lld", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+
+ PUGI__FN bool set_value_convert(char_t*& dest, uintptr_t& header, uintptr_t header_mask, unsigned long long value)
+ {
+ char buf[128];
+ sprintf(buf, "%llu", value);
+
+ return set_value_buffer(dest, header, header_mask, buf);
+ }
+#endif
+
+ // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
+ PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
+ {
+ #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef __int64 length_type;
+
+ _fseeki64(file, 0, SEEK_END);
+ length_type length = _ftelli64(file);
+ _fseeki64(file, 0, SEEK_SET);
+ #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
+ // there are 64-bit versions of fseek/ftell, let's use them
+ typedef off64_t length_type;
+
+ fseeko64(file, 0, SEEK_END);
+ length_type length = ftello64(file);
+ fseeko64(file, 0, SEEK_SET);
+ #else
+ // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
+ typedef long length_type;
+
+ fseek(file, 0, SEEK_END);
+ length_type length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+ #endif
+
+ // check for I/O errors
+ if (length < 0) return status_io_error;
+
+ // check for overflow
+ size_t result = static_cast