diff --git a/detours/creatwth.cpp b/detours/creatwth.cpp index e2eb1476..d7bc08ac 100644 --- a/detours/creatwth.cpp +++ b/detours/creatwth.cpp @@ -7,34 +7,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // -#if _MSC_VER >= 1900 -#pragma warning(push) -#pragma warning(disable:4091) // empty typedef -#endif -#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1 -#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 -#include -#include -#pragma warning(push) -#if _MSC_VER > 1400 -#pragma warning(disable:6102 6103) // /analyze warnings -#endif -#include -#pragma warning(pop) - // #define DETOUR_DEBUG 1 #define DETOURS_INTERNAL - #include "detours.h" +#include #if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH #error detours.h version mismatch #endif -#if _MSC_VER >= 1900 -#pragma warning(pop) -#endif - #define IMPORT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT] #define BOUND_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT] #define CLR_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR] diff --git a/detours/detours.cpp b/detours/detours.cpp index 08e4c672..b8f46884 100644 --- a/detours/detours.cpp +++ b/detours/detours.cpp @@ -7,33 +7,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // -#pragma warning(disable:4068) // unknown pragma (suppress) - -#if _MSC_VER >= 1900 -#pragma warning(push) -#pragma warning(disable:4091) // empty typedef -#endif - -#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 -#include - -#if (_MSC_VER < 1299) -#pragma warning(disable: 4710) -#endif //#define DETOUR_DEBUG 1 #define DETOURS_INTERNAL - #include "detours.h" #if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH #error detours.h version mismatch #endif -#if _MSC_VER >= 1900 -#pragma warning(pop) -#endif - #define NOTHROW ////////////////////////////////////////////////////////////////////////////// @@ -880,7 +862,8 @@ struct _DETOUR_TRAMPOLINE { // An ARM64 instruction is 4 bytes long. // - // The overwrite is always 2 instructions plus a literal, so 16 bytes, 4 instructions. + // The overwrite is always composed of 3 instructions (12 bytes) which perform an indirect jump + // using _DETOUR_TRAMPOLINE::pbDetour as the address holding the target location. // // Copied instructions can expand. // @@ -915,7 +898,7 @@ struct _DETOUR_TRAMPOLINE C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 184); enum { - SIZE_OF_JMP = 16 + SIZE_OF_JMP = 12 }; inline ULONG fetch_opcode(PBYTE pbCode) @@ -929,6 +912,79 @@ inline void write_opcode(PBYTE &pbCode, ULONG Opcode) pbCode += 4; } +struct ARM64_INDIRECT_JMP { + struct { + ULONG Rd : 5; + ULONG immhi : 19; + ULONG iop : 5; + ULONG immlo : 2; + ULONG op : 1; + } ardp; + + struct { + ULONG Rt : 5; + ULONG Rn : 5; + ULONG imm : 12; + ULONG opc : 2; + ULONG iop1 : 2; + ULONG V : 1; + ULONG iop2 : 3; + ULONG size : 2; + } ldr; + + ULONG br; +}; + +#pragma warning(push) +#pragma warning(disable:4201) + +union ARM64_INDIRECT_IMM { + struct { + ULONG64 pad : 12; + ULONG64 adrp_immlo : 2; + ULONG64 adrp_immhi : 19; + }; + + LONG64 value; +}; + +#pragma warning(pop) + +PBYTE detour_gen_jmp_indirect(BYTE *pbCode, ULONG64 *pbJmpVal) +{ + // adrp x17, [jmpval] + // ldr x17, [x17, jmpval] + // br x17 + + struct ARM64_INDIRECT_JMP *pIndJmp; + union ARM64_INDIRECT_IMM jmpIndAddr; + + jmpIndAddr.value = (((LONG64)pbJmpVal) & 0xFFFFFFFFFFFFF000) - + (((LONG64)pbCode) & 0xFFFFFFFFFFFFF000); + + pIndJmp = (struct ARM64_INDIRECT_JMP *)pbCode; + pbCode = (BYTE *)(pIndJmp + 1); + + pIndJmp->ardp.Rd = 17; + pIndJmp->ardp.immhi = jmpIndAddr.adrp_immhi; + pIndJmp->ardp.iop = 0x10; + pIndJmp->ardp.immlo = jmpIndAddr.adrp_immlo; + pIndJmp->ardp.op = 1; + + pIndJmp->ldr.Rt = 17; + pIndJmp->ldr.Rn = 17; + pIndJmp->ldr.imm = (((ULONG64)pbJmpVal) & 0xFFF) / 8; + pIndJmp->ldr.opc = 1; + pIndJmp->ldr.iop1 = 1; + pIndJmp->ldr.V = 0; + pIndJmp->ldr.iop2 = 7; + pIndJmp->ldr.size = 3; + + pIndJmp->br = 0xD61F0220; + + return pbCode; +} + PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal) { PBYTE pbLiteral; @@ -995,7 +1051,7 @@ inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals) the bottom 12 bits cleared to zero, and then writes the result to a general-purpose register. This permits the calculation of the address at a 4KB aligned memory region. In conjunction with an ADD (immediate) instruction, or a Load/Store instruction with a 12-bit immediate offset, this allows for the calculation of, or access to, any address - within ±4GB of the current PC. + within +/- 4GB of the current PC. PC-rel. addressing This section describes the encoding of the PC-rel. addressing instruction class. The encodings in this section are @@ -1062,7 +1118,10 @@ inline void detour_find_jmp_bounds(PBYTE pbCode, PDETOUR_TRAMPOLINE *ppLower, PDETOUR_TRAMPOLINE *ppUpper) { - // We have to place trampolines within +/- 2GB of code. + // The encoding used by detour_gen_jmp_indirect actually enables a + // displacement of +/- 4GiB. In the future, this could be changed to + // reflect that. For now, just reuse the x86 logic which is plenty. + ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode); ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode); DETOUR_TRACE(("[%p..%p..%p]\n", lo, pbCode, hi)); @@ -1250,6 +1309,65 @@ static PVOID detour_alloc_region_from_hi(PBYTE pbLo, PBYTE pbHi) return NULL; } +static PVOID detour_alloc_trampoline_allocate_new(PBYTE pbTarget, + PDETOUR_TRAMPOLINE pLo, + PDETOUR_TRAMPOLINE pHi) +{ + PVOID pbTry = NULL; + + // NB: We must always also start the search at an offset from pbTarget + // in order to maintain ASLR entropy. + +#if defined(DETOURS_64BIT) + // Try looking 1GB below or lower. + if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) { + pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget - 0x40000000); + } + // Try looking 1GB above or higher. + if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) { + pbTry = detour_alloc_region_from_lo(pbTarget + 0x40000000, (PBYTE)pHi); + } + // Try looking 1GB below or higher. + if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) { + pbTry = detour_alloc_region_from_lo(pbTarget - 0x40000000, pbTarget); + } + // Try looking 1GB above or lower. + if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) { + pbTry = detour_alloc_region_from_hi(pbTarget, pbTarget + 0x40000000); + } +#endif + + // Try anything below. + if (pbTry == NULL) { + pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget); + } + // try anything above. + if (pbTry == NULL) { + pbTry = detour_alloc_region_from_lo(pbTarget, (PBYTE)pHi); + } + + return pbTry; +} + +PVOID WINAPI DetourAllocateRegionWithinJumpBounds(_In_ LPCVOID pbTarget, + _Out_ PDWORD pcbAllocatedSize) +{ + PDETOUR_TRAMPOLINE pLo; + PDETOUR_TRAMPOLINE pHi; + detour_find_jmp_bounds((PBYTE)pbTarget, &pLo, &pHi); + + PVOID pbNewlyAllocated = + detour_alloc_trampoline_allocate_new((PBYTE)pbTarget, pLo, pHi); + if (pbNewlyAllocated == NULL) { + DETOUR_TRACE(("Couldn't find available memory region!\n")); + *pcbAllocatedSize = 0; + return NULL; + } + + *pcbAllocatedSize = DETOUR_REGION_SIZE; + return pbNewlyAllocated; +} + static PDETOUR_TRAMPOLINE detour_alloc_trampoline(PBYTE pbTarget) { // We have to place trampolines within +/- 2GB of target. @@ -1294,41 +1412,10 @@ static PDETOUR_TRAMPOLINE detour_alloc_trampoline(PBYTE pbTarget) // Round pbTarget down to 64KB block. pbTarget = pbTarget - (PtrToUlong(pbTarget) & 0xffff); - PVOID pbTry = NULL; - - // NB: We must always also start the search at an offset from pbTarget - // in order to maintain ASLR entropy. - -#if defined(DETOURS_64BIT) - // Try looking 1GB below or lower. - if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) { - pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget - 0x40000000); - } - // Try looking 1GB above or higher. - if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) { - pbTry = detour_alloc_region_from_lo(pbTarget + 0x40000000, (PBYTE)pHi); - } - // Try looking 1GB below or higher. - if (pbTry == NULL && pbTarget > (PBYTE)0x40000000) { - pbTry = detour_alloc_region_from_lo(pbTarget - 0x40000000, pbTarget); - } - // Try looking 1GB above or lower. - if (pbTry == NULL && pbTarget < (PBYTE)0xffffffff40000000) { - pbTry = detour_alloc_region_from_hi(pbTarget, pbTarget + 0x40000000); - } -#endif - - // Try anything below. - if (pbTry == NULL) { - pbTry = detour_alloc_region_from_hi((PBYTE)pLo, pbTarget); - } - // try anything above. - if (pbTry == NULL) { - pbTry = detour_alloc_region_from_lo(pbTarget, (PBYTE)pHi); - } - - if (pbTry != NULL) { - s_pRegion = (DETOUR_REGION*)pbTry; + PVOID pbNewlyAllocated = + detour_alloc_trampoline_allocate_new(pbTarget, pLo, pHi); + if (pbNewlyAllocated != NULL) { + s_pRegion = (DETOUR_REGION*)pbNewlyAllocated; s_pRegion->dwSignature = DETOUR_REGION_SIGNATURE; s_pRegion->pFree = NULL; s_pRegion->pNext = s_pRegions; @@ -1655,7 +1742,7 @@ LONG WINAPI DetourTransactionCommitEx(_Out_opt_ PVOID **pppFailedPointer) #endif // DETOURS_ARM #ifdef DETOURS_ARM64 - PBYTE pbCode = detour_gen_jmp_immediate(o->pbTarget, NULL, o->pTrampoline->pbDetour); + PBYTE pbCode = detour_gen_jmp_indirect(o->pbTarget, (ULONG64*)&(o->pTrampoline->pbDetour)); pbCode = detour_gen_brk(pbCode, o->pTrampoline->pbRemain); *o->ppbPointer = o->pTrampoline->rbCode; UNREFERENCED_PARAMETER(pbCode); diff --git a/detours/detours.h b/detours/detours.h index faff4fe4..97595a2c 100644 --- a/detours/detours.h +++ b/detours/detours.h @@ -16,6 +16,34 @@ ////////////////////////////////////////////////////////////////////////////// // +#ifdef DETOURS_INTERNAL + +#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1 +#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 + +#pragma warning(disable:4068) // unknown pragma (suppress) + +#if _MSC_VER >= 1900 +#pragma warning(push) +#pragma warning(disable:4091) // empty typedef +#endif + +#include +#if (_MSC_VER < 1310) +#else +#pragma warning(push) +#if _MSC_VER > 1400 +#pragma warning(disable:6102 6103) // /analyze warnings +#endif +#include +#pragma warning(pop) +#endif + +#endif // DETOURS_INTERNAL + +////////////////////////////////////////////////////////////////////////////// +// + #undef DETOURS_X64 #undef DETOURS_X86 #undef DETOURS_IA64 @@ -61,7 +89,12 @@ //#define DETOURS_OPTION_BITS 32 #endif -#define VER_DETOURS_BITS DETOUR_STRINGIFY(DETOURS_BITS) +/////////////////////////////////////////////////////////////// Helper Macros. +// +#define DETOURS_STRINGIFY_(x) #x +#define DETOURS_STRINGIFY(x) DETOURS_STRINGIFY_(x) + +#define VER_DETOURS_BITS DETOURS_STRINGIFY(DETOURS_BITS) ////////////////////////////////////////////////////////////////////////////// // @@ -387,7 +420,6 @@ typedef struct _DETOUR_EXE_RESTORE #ifdef IMAGE_NT_OPTIONAL_HDR64_MAGIC // some environments do not have this BYTE raw[sizeof(IMAGE_NT_HEADERS64) + sizeof(IMAGE_SECTION_HEADER) * 32]; - C_ASSERT(sizeof(IMAGE_NT_HEADERS64) == 0x108); #else BYTE raw[0x108 + sizeof(IMAGE_SECTION_HEADER) * 32]; #endif @@ -396,6 +428,10 @@ typedef struct _DETOUR_EXE_RESTORE } DETOUR_EXE_RESTORE, *PDETOUR_EXE_RESTORE; +#ifdef IMAGE_NT_OPTIONAL_HDR64_MAGIC +C_ASSERT(sizeof(IMAGE_NT_HEADERS64) == 0x108); +#endif + // The size can change, but assert for clarity due to the muddying #ifdefs. #ifdef _WIN64 C_ASSERT(sizeof(DETOUR_EXE_RESTORE) == 0x688); @@ -431,11 +467,6 @@ typedef struct _DETOUR_EXE_HELPER 0,\ } -/////////////////////////////////////////////////////////////// Helper Macros. -// -#define DETOURS_STRINGIFY(x) DETOURS_STRINGIFY_(x) -#define DETOURS_STRINGIFY_(x) #x - ///////////////////////////////////////////////////////////// Binary Typedefs. // typedef BOOL (CALLBACK *PF_DETOUR_BINARY_BYWAY_CALLBACK)( @@ -523,6 +554,8 @@ PVOID WINAPI DetourCopyInstruction(_In_opt_ PVOID pDst, _Out_opt_ LONG *plExtra); BOOL WINAPI DetourSetCodeModule(_In_ HMODULE hModule, _In_ BOOL fLimitReferencesToModule); +PVOID WINAPI DetourAllocateRegionWithinJumpBounds(_In_ LPCVOID pbTarget, + _Out_ PDWORD pcbAllocatedSize); ///////////////////////////////////////////////////// Loaded Binary Functions. // diff --git a/detours/detver.h b/detours/detver.h index f0aae9bd..3d4f5448 100644 --- a/detours/detver.h +++ b/detours/detver.h @@ -14,8 +14,8 @@ #include #else #ifndef DETOURS_STRINGIFY -#define DETOURS_STRINGIFY(x) DETOURS_STRINGIFY_(x) #define DETOURS_STRINGIFY_(x) #x +#define DETOURS_STRINGIFY(x) DETOURS_STRINGIFY_(x) #endif #define VER_FILEFLAGSMASK 0x3fL @@ -24,4 +24,4 @@ #define VER_FILETYPE 0x00000002L #define VER_FILESUBTYPE 0x00000000L #endif -#define VER_DETOURS_BITS DETOUR_STRINGIFY(DETOURS_BITS) +#define VER_DETOURS_BITS DETOURS_STRINGIFY(DETOURS_BITS) diff --git a/detours/disasm.cpp b/detours/disasm.cpp index f7896d46..ce666fb0 100644 --- a/detours/disasm.cpp +++ b/detours/disasm.cpp @@ -7,28 +7,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // -#if _MSC_VER >= 1900 -#pragma warning(push) -#pragma warning(disable:4091) // empty typedef -#endif - -#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 -#include -#include - // #define DETOUR_DEBUG 1 #define DETOURS_INTERNAL - #include "detours.h" +#include #if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH #error detours.h version mismatch #endif -#if _MSC_VER >= 1900 -#pragma warning(pop) -#endif - #undef ASSERT #define ASSERT(x) @@ -260,6 +247,11 @@ class CDetourDis #define ENTRY_CopyFF ENTRY_DataIgnored &CDetourDis::CopyFF #define ENTRY_CopyVex2 ENTRY_DataIgnored &CDetourDis::CopyVex2 #define ENTRY_CopyVex3 ENTRY_DataIgnored &CDetourDis::CopyVex3 +#define ENTRY_CopyEvex ENTRY_DataIgnored &CDetourDis::CopyEvex // 62, 3 byte payload, then normal with implied prefixes like vex +#define ENTRY_CopyXop ENTRY_DataIgnored &CDetourDis::CopyXop // 0x8F ... POP /0 or AMD XOP +#define ENTRY_CopyBytesXop 5, 5, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm +#define ENTRY_CopyBytesXop1 6, 6, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm ... imm8 +#define ENTRY_CopyBytesXop4 9, 9, 4, 0, 0, &CDetourDis::CopyBytes // 0x8F xop1 xop2 opcode modrm ... imm32 #define ENTRY_Invalid ENTRY_DataIgnored &CDetourDis::Invalid #define ENTRY_End ENTRY_DataIgnored NULL @@ -289,6 +281,9 @@ class CDetourDis PBYTE CopyVex2(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); PBYTE CopyVex3(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); PBYTE CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p); + PBYTE CopyEvex(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); + PBYTE CopyXop(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc); protected: static const COPYENTRY s_rceCopyTable[257]; @@ -303,6 +298,7 @@ class CDetourDis BOOL m_bAddressOverride; BOOL m_bRaxOverride; // AMD64 only BOOL m_bVex; + BOOL m_bEvex; BOOL m_bF2; BOOL m_bF3; // x86 only BYTE m_nSegmentOverride; @@ -337,6 +333,7 @@ CDetourDis::CDetourDis(_Out_opt_ PBYTE *ppbTarget, _Out_opt_ LONG *plExtra) m_bF2 = FALSE; m_bF3 = FALSE; m_bVex = FALSE; + m_bEvex = FALSE; m_ppbTarget = ppbTarget ? ppbTarget : &m_pbScratchTarget; m_plExtra = plExtra ? plExtra : &m_lScratchExtra; @@ -368,8 +365,11 @@ PBYTE CDetourDis::CopyBytes(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) { UINT nBytesFixed; - ASSERT(!m_bVex || pEntry->nFlagBits == 0); - ASSERT(!m_bVex || pEntry->nFixedSize == pEntry->nFixedSize16); + if (m_bVex || m_bEvex) + { + ASSERT(pEntry->nFlagBits == 0); + ASSERT(pEntry->nFixedSize == pEntry->nFixedSize16); + } UINT const nModOffset = pEntry->nModOffset; UINT const nFlagBits = pEntry->nFlagBits; @@ -748,33 +748,42 @@ PBYTE CDetourDis::CopyFF(REFCOPYENTRY pEntry, PBYTE pbDst, PBYTE pbSrc) return pbOut; } -PBYTE CDetourDis::CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc) +PBYTE CDetourDis::CopyVexEvexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc, BYTE p) // m is first instead of last in the hopes of pbDst/pbSrc being // passed along efficiently in the registers they were already in. { static const COPYENTRY ceF38 = { 0x38, ENTRY_CopyBytes2Mod }; static const COPYENTRY ceF3A = { 0x3A, ENTRY_CopyBytes2Mod1 }; - static const COPYENTRY Invalid = { 0xC4, ENTRY_Invalid }; + static const COPYENTRY ceInvalid = { 0xC4, ENTRY_Invalid }; - m_bVex = TRUE; - REFCOPYENTRY pEntry; - switch (m) { - default: pEntry = &Invalid; break; - case 1: pEntry = &s_rceCopyTable0F[pbSrc[0]]; break; - case 2: pEntry = &ceF38; break; - case 3: pEntry = &ceF3A; break; - } - - switch (pbSrc[-1] & 3) { // p in last byte + switch (p & 3) { case 0: break; case 1: m_bOperandOverride = TRUE; break; case 2: m_bF3 = TRUE; break; case 3: m_bF2 = TRUE; break; } - return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); + REFCOPYENTRY pEntry; + + switch (m) { + default: return Invalid(&ceInvalid, pbDst, pbSrc); + case 1: pEntry = &s_rceCopyTable0F[pbSrc[0]]; + return (this->*pEntry->pfCopy)(pEntry, pbDst, pbSrc); + case 2: return CopyBytes(&ceF38, pbDst, pbSrc); + case 3: return CopyBytes(&ceF3A, pbDst, pbSrc); + } } +PBYTE CDetourDis::CopyVexCommon(BYTE m, PBYTE pbDst, PBYTE pbSrc) +// m is first instead of last in the hopes of pbDst/pbSrc being +// passed along efficiently in the registers they were already in. +{ + m_bVex = TRUE; + BYTE const p = (BYTE)(pbSrc[-1] & 3); // p in last byte + return CopyVexEvexCommon(m, pbDst, pbSrc, p); +} + + PBYTE CDetourDis::CopyVex3(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) // 3 byte VEX prefix 0xC4 { @@ -835,6 +844,78 @@ PBYTE CDetourDis::CopyVex2(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) return CopyVexCommon(1, pbDst + 2, pbSrc + 2); } +PBYTE CDetourDis::CopyEvex(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +// 62, 3 byte payload, x86 with implied prefixes like Vex +// for 32bit, mode 0xC0 else fallback to bound /r +{ + // NOTE: Intel and Wikipedia number these differently. + // Intel says 0-2, Wikipedia says 1-3. + + BYTE const p0 = pbSrc[1]; + +#ifdef DETOURS_X86 + const static COPYENTRY ceBound = { 0x62, ENTRY_CopyBytes2Mod }; + if ((p0 & 0xC0) != 0xC0) { + return CopyBytes(&ceBound, pbDst, pbSrc); + } +#endif + + static const COPYENTRY ceInvalid = { 0x62, ENTRY_Invalid }; + + if ((p0 & 0x0C) != 0) + return Invalid(&ceInvalid, pbDst, pbSrc); + + BYTE const p1 = pbSrc[2]; + + if ((p1 & 0x04) != 0x04) + return Invalid(&ceInvalid, pbDst, pbSrc); + + // Copy 4 byte prefix. + *(UNALIGNED ULONG *)pbDst = *(UNALIGNED ULONG*)pbSrc; + + m_bEvex = TRUE; + +#ifdef DETOURS_X64 + m_bRaxOverride |= !!(p1 & 0x80); // w +#endif + + return CopyVexEvexCommon(p0 & 3u, pbDst + 4, pbSrc + 4, p1 & 3u); +} + +PBYTE CDetourDis::CopyXop(REFCOPYENTRY, PBYTE pbDst, PBYTE pbSrc) +/* 3 byte AMD XOP prefix 0x8F +byte0: 0x8F +byte1: RXBmmmmm +byte2: WvvvvLpp +byte3: opcode +mmmmm >= 8, else pop +mmmmm only otherwise defined for 8, 9, A. +pp is like VEX but only instructions with 0 are defined +*/ +{ + const static COPYENTRY cePop = { 0x8F, ENTRY_CopyBytes2Mod }; + const static COPYENTRY ceXop = { 0x8F, ENTRY_CopyBytesXop }; + const static COPYENTRY ceXop1 = { 0x8F, ENTRY_CopyBytesXop1 }; + const static COPYENTRY ceXop4 = { 0x8F, ENTRY_CopyBytesXop4 }; + + BYTE const m = (BYTE)(pbSrc[1] & 0x1F); + ASSERT(m <= 10); + switch (m) + { + default: + return CopyBytes(&cePop, pbDst, pbSrc); + + case 8: // modrm with 8bit immediate + return CopyBytes(&ceXop1, pbDst, pbSrc); + + case 9: // modrm with no immediate + return CopyBytes(&ceXop, pbDst, pbSrc); + + case 10: // modrm with 32bit immediate + return CopyBytes(&ceXop4, pbDst, pbSrc); + } +} + ////////////////////////////////////////////////////////////////////////////// // PBYTE CDetourDis::s_pbModuleBeg = NULL; @@ -1030,11 +1111,11 @@ const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[257] = #ifdef DETOURS_X64 { 0x60, ENTRY_Invalid }, // Invalid { 0x61, ENTRY_Invalid }, // Invalid - { 0x62, ENTRY_Invalid }, // Invalid (not yet implemented Intel EVEX support) + { 0x62, ENTRY_CopyEvex }, // EVEX / AVX512 #else { 0x60, ENTRY_CopyBytes1 }, // PUSHAD { 0x61, ENTRY_CopyBytes1 }, // POPAD - { 0x62, ENTRY_CopyBytes2Mod }, // BOUND /r + { 0x62, ENTRY_CopyEvex }, // BOUND /r and EVEX / AVX512 #endif { 0x63, ENTRY_CopyBytes2Mod }, // 32bit ARPL /r, 64bit MOVSXD { 0x64, ENTRY_CopyBytesSegment }, // FS prefix @@ -1084,7 +1165,7 @@ const CDetourDis::COPYENTRY CDetourDis::s_rceCopyTable[257] = { 0x8C, ENTRY_CopyBytes2Mod }, // MOV /r { 0x8D, ENTRY_CopyBytes2Mod }, // LEA /r { 0x8E, ENTRY_CopyBytes2Mod }, // MOV /r - { 0x8F, ENTRY_CopyBytes2Mod }, // POP /0 + { 0x8F, ENTRY_CopyXop }, // POP /0 or AMD XOP { 0x90, ENTRY_CopyBytes1 }, // NOP { 0x91, ENTRY_CopyBytes1 }, // XCHG { 0x92, ENTRY_CopyBytes1 }, // XCHG diff --git a/detours/image.cpp b/detours/image.cpp index 21ff501d..4fa31a73 100644 --- a/detours/image.cpp +++ b/detours/image.cpp @@ -9,39 +9,18 @@ // Used for for payloads, byways, and imports. // -#if _MSC_VER >= 1900 -#pragma warning(push) -#pragma warning(disable:4091) // empty typedef -#endif -#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1 -#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 -#include -#if _MSC_VER >= 1310 -#pragma warning(push) -#if _MSC_VER > 1400 -#pragma warning(disable:6102 6103) // /analyze warnings -#endif -#include -#pragma warning(pop) -#endif - -#if (_MSC_VER < 1299) +#if _MSC_VER < 1299 #pragma warning(disable: 4710) #endif // #define DETOUR_DEBUG 1 #define DETOURS_INTERNAL - #include "detours.h" #if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH #error detours.h version mismatch #endif -#if _MSC_VER >= 1900 -#pragma warning(pop) -#endif - namespace Detour { ////////////////////////////////////////////////////////////////////////////// @@ -1714,17 +1693,13 @@ BOOL CImage::Write(HANDLE hFile) m_nNextFileAddr = Max(m_SectionHeaders[n].PointerToRawData + m_SectionHeaders[n].SizeOfRawData, m_nNextFileAddr); -#if 0 - m_nNextVirtAddr = Max(m_SectionHeaders[n].VirtualAddress + - m_SectionHeaders[n].Misc.VirtualSize, - m_nNextVirtAddr); -#else + // Old images have VirtualSize == 0 as a matter of course, e.g. NT 3.1. + // In which case, use SizeOfRawData instead. m_nNextVirtAddr = Max(m_SectionHeaders[n].VirtualAddress + (m_SectionHeaders[n].Misc.VirtualSize ? m_SectionHeaders[n].Misc.VirtualSize : SectionAlign(m_SectionHeaders[n].SizeOfRawData)), m_nNextVirtAddr); -#endif m_nExtraOffset = Max(m_nNextFileAddr, m_nExtraOffset); @@ -1857,7 +1832,7 @@ BOOL CImage::Write(HANDLE hFile) for (CImageImportFile *pImportFile = m_pImportFiles; pImportFile != NULL; pImportFile = pImportFile->m_pNextFile) { - ZeroMemory(piidDst, sizeof(piidDst)); + ZeroMemory(piidDst, sizeof(*piidDst)); nameTable.Allocate(pImportFile->m_pszName, (DWORD *)&piidDst->Name); piidDst->TimeDateStamp = 0; piidDst->ForwarderChain = pImportFile->m_nForwarderChain; @@ -1899,7 +1874,7 @@ BOOL CImage::Write(HANDLE hFile) } piidDst++; } - ZeroMemory(piidDst, sizeof(piidDst)); + ZeroMemory(piidDst, sizeof(*piidDst)); ////////////////////////////////////////////////////////////////////////// // diff --git a/detours/modules.cpp b/detours/modules.cpp index 34167308..ade78c97 100644 --- a/detours/modules.cpp +++ b/detours/modules.cpp @@ -9,27 +9,6 @@ // Module enumeration functions. // -#define _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS 1 - -#pragma warning(disable:4068) // unknown pragma (suppress) - -#if _MSC_VER >= 1900 -#pragma warning(push) -#pragma warning(disable:4091) // empty typedef -#endif - -#define _ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE 1 -#include -#if (_MSC_VER < 1310) -#else -#pragma warning(push) -#if _MSC_VER > 1400 -#pragma warning(disable:6102 6103) // /analyze warnings -#endif -#include -#pragma warning(pop) -#endif - // #define DETOUR_DEBUG 1 #define DETOURS_INTERNAL #include "detours.h" @@ -38,10 +17,6 @@ #error detours.h version mismatch #endif -#if _MSC_VER >= 1900 -#pragma warning(pop) -#endif - #define CLR_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR] #define IAT_DIRECTORY OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT] @@ -164,8 +139,8 @@ PDETOUR_SYM_INFO DetourLoadImageHlp(VOID) return pSymInfo; } -PVOID WINAPI DetourFindFunction(_In_ PCSTR pszModule, - _In_ PCSTR pszFunction) +PVOID WINAPI DetourFindFunction(_In_ LPCSTR pszModule, + _In_ LPCSTR pszFunction) { /////////////////////////////////////////////// First, try GetProcAddress. //