天堂之门技术

在32位WoW64进程中执行64位代码,以及直接调用64位Win32API函数的技术。可以作为一种软件保护技术,防止静态分析以及跨进程API Hook,也可以绕过沙盒对于Win32 API调用的检测(因为大多数沙箱只hook 32位API)。

原理

Windows提供一种供32位程序在64位系统的机制WoW64,是64位系统为32位应用创建的模拟环境。

WoW64调用过程:

  1. 首先调用32位dll中的API
  2. 32位dllAPI调用相关模拟dll,如ntdll.dll就调用wow64cpu.dll中的X86SwitchTo64BitMode,切换模式
  3. wow64.dll将32位转化为64位系统调用
  4. 调用64位的对应API
  5. 切换到内核态(Ring0)执行系统调用

天堂之门即绕过WoW64的机制,手动切换到64位模式下并执行64位代码,流程:

  1. 使用push 0x33 retf将cs寄存器设置为0x33,切换到64位模式
  2. gs:[0x60]获取64位PEB
  3. 从64位PEB中读取64位ntdll的基址
  4. 遍历ntdll64导出表,读取API地址
  5. 构造64位系统调用

其中为了加载其它API,还需要获取LoadLibraryGetProcAddress的地址

  1. 遍历ntdll64,获取LoadLibrary的地址
  2. 加载kernel32.dll
  3. 遍历kernel32.dll导出表,得到GetProcAddress地址

这样就可以调用所有API了

代码

汇编代码使用keystone编译

import keystone.keystone as ks

code = '''
push 0x33
push 0x12345678
retf
'''

kst = ks.Ks(ks.KS_ARCH_X86, ks.KS_MODE_32)
asm, cnt = kst.asm(code)
print(code)
for b in asm:
    print('0x' + hex(b)[2:].upper(), end=', ')

C代码使用VS编译,VS要设置Release并关闭优化

并且要设置为静态编译,多线程

#include <inttypes.h>
#include <stdio.h>
#include <Windows.h>


void memcpy64(uint64_t dst,uint64_t src,uint64_t sz) {
    static uint8_t code[] = {
        /*	[bits 32]
            push 0x33
            push _next_x64_code
            retf # retf 相当于pop cs; retn;
            # cs为0x33时让CPU以64位执行指令,为0x23时让CPU以32位执行指令
        */
        0x6A, 0x33, 0x68, 0x78, 0x56, 0x34, 0x12, 0xCB,
        /*	[bits 64]
            push rsi
            push rdi
            mov rsi, src
            mov rdi, dst
            mov rcx, sz
            rep movsb
            pop rsi
            pop rdi
        */
        0x56, 0x57,
        0x48, 0xBE, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11,
        0x48, 0xBF, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11,
        0x48, 0xB9, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11,
        0xF3, 0xA4,
        0x5E, 0x5F,
        /*	[bits 64]
            push 0x23 # 切换回32位
            push _next_x86_code
            retfq # 表示返回到64位地址
        */
        0x6A, 0x23, 0x68, 0x78, 0x56, 0x34, 0x12, 0x48, 0xCB,
        /*	[bits 32]
            ret
        */
        0xC3
    };
    static uint32_t ptr = NULL;
    if (!ptr) {
        ptr = (uint32_t)VirtualAlloc(NULL, sizeof(code), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
        for (int i = 0; i < sizeof(code); i++) ((uint8_t*)ptr)[i] = code[i];
    }
    *(uint32_t*)(ptr + 3) = ptr + 8;
    *(uint64_t*)(ptr + 12) = src;
    *(uint64_t*)(ptr + 22) = dst;
    *(uint64_t*)(ptr + 32) = sz;
    *(uint32_t*)(ptr + 47) = ptr + 53;
    ((void(*)())ptr)();
}

void GetPEB64(void* peb64) {
    static uint8_t code[] = {
        /*	[bits 32]
            mov esi, peb64
            push 0x33
            push _next_x64_code
            retf
        */
        0xBE, 0x78, 0x56, 0x34, 0x12, 0x6A, 0x33, 0x68, 0x78, 0x56, 0x34, 0x12, 0xCB,
        /*	[bits 64]
            mov rax, gs:[0x60] # 64位模式下,gs:[0x30]指向TEB,gs:[0x60]指向PEB
            mov [esi], rax
        */
        0x65, 0x48, 0xA1, 0x60, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x67, 0x48, 0x89, 0x6,
        /*	[bits 64]
            push 0x23
            push _next_x86_code
            retfq
        */
        0x6A, 0x23, 0x68, 0x78, 0x56, 0x34, 0x12, 0x48, 0xCB,
        /*	[bits 32]
            ret
        */
        0xC3
    };

    static uint32_t ptr = NULL;
    if (!ptr) {
        ptr = (uint32_t)VirtualAlloc(NULL, sizeof(code), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
        for (int i = 0; i < sizeof(code); i++) ((PBYTE)ptr)[i] = code[i];
    }
    *(uint32_t*)(ptr + 1) = (uint32_t)peb64;
    *(uint32_t*)(ptr + 8) = ptr + 13;
    *(uint32_t*)(ptr + 31) = ptr + 37;
    ((void(*)())ptr)();
}

uint64_t GetModuleHandle64(const WCHAR* moduleName) {
    uint64_t peb64;
    /*	nt!_PEB_LDR_DATA
       +0x000 Length           : Uint4B
       +0x004 Initialized      : UChar
       +0x008 SsHandle         : Ptr64 Void
       +0x010 InLoadOrderModuleList : _LIST_ENTRY
    */
    uint64_t ldrData;
    /*
        ptr to InLoadOrderModuleList
    */
    uint64_t head;
    /*
        typedef struct _LDR_MODULE {
          +0x000 LIST_ENTRY              InLoadOrderModuleList;
          +0x010 LIST_ENTRY              InMemoryOrderModuleList;
          +0x020 LIST_ENTRY              InInitializationOrderModuleList;
          +0x030 PVOID                   BaseAddress;
          +0x038 PVOID                   EntryPoint;
          +0x040 ULONG                   SizeOfImage;
          +0x048 UNICODE_STRING          FullDllName;
          +0x058 UNICODE_STRING          BaseDllName;
          ...
        } LDR_MODULE, *PLDR_MODULE;
    */
    uint64_t pNode;
    GetPEB64(&peb64);
    memcpy64((uint64_t)&ldrData, peb64 + 0x18, 8); // PEB+0x18为LDR地址
    head = ldrData + 0x10; // LDR+0x10为InLoadOrderModuleList地址
    memcpy64((uint64_t)&pNode, head, 8);
    while (pNode != head) { // 遍历InLoadOrderModuleList
        uint64_t buffer;
        memcpy64((uint64_t)(unsigned)(&buffer), pNode + 96, 8);	// tmp = pNode->BaseDllName->Buffer
        if (buffer) {
            WCHAR curModuleName[32] = { 0 };
            memcpy64((uint64_t)curModuleName, buffer, 60); // 获取模块名
            if (!lstrcmpiW(moduleName, curModuleName)) { // 比对moduleName
                uint64_t base;
                memcpy64((uint64_t)&base, pNode + 48, 8);
                return base;
            }
        }
        memcpy64((uint64_t)&pNode, pNode, 8);	// pNode = pNode->Flink
    }
    return NULL;
}

uint64_t _GetProcAddress64(uint64_t hModule, const char* func) {
    IMAGE_DOS_HEADER dos;
    memcpy64((uint64_t)&dos, hModule, sizeof(dos));
    IMAGE_NT_HEADERS64 nt;
    memcpy64((uint64_t)&nt, hModule + dos.e_lfanew, sizeof(nt));
    IMAGE_EXPORT_DIRECTORY expo;
    memcpy64((uint64_t)&expo, hModule + nt.OptionalHeader.DataDirectory[0].VirtualAddress, sizeof(expo));

    for (uint64_t i = 0; i < expo.NumberOfNames; i++) {
        DWORD pName;
        memcpy64((uint64_t)&pName, hModule + expo.AddressOfNames + (4 * i), 4);
        char name[64] = { 0 };
        memcpy64((uint64_t)name, hModule + pName, 64);
        if (!lstrcmpA(name, func)) {
            WORD ord;
            memcpy64((uint64_t)&ord, hModule + expo.AddressOfNameOrdinals + (2 * i), 2);
            uint32_t addr;
            memcpy64((uint64_t)&addr, hModule + expo.AddressOfFunctions + (4 * ord), 4);
            return hModule + addr;
        }
    }
    return NULL;
}

uint64_t X64Call(uint64_t proc, uint32_t argc, ...) {
    uint64_t* args = (uint64_t*)(&argc + 1);
    uint64_t ret = 0;
    static uint8_t code[] = {
        /*	[bits 32]
            push ebx
            mov ebx, esp
            and esp, 0xFFFFFFF8

            push 0x33
            push _next_x64_code
            retf
        */
        0x53, 0x89, 0xE3, 0x83, 0xE4, 0xF8,
        0x6A, 0x33, 0x68, 0x78, 0x56, 0x34, 0x12, 0xCB,
        /*	[bits 64]
            push rsi
            push rdi

            mov rsi, args
            mov rcx, [rsi]
            mov rdx, [rsi+8]
            mov r8, [rsi+16]
            mov r9, [rsi+24]

            mov rax, argc
            args_start:
                cmp rax, 4
                jle args_end
                mov rdi, [rsi+8*rax-8]
                push rdi
                dec rax
                jmp args_start
            args_end:

            mov rax, proc
            sub rsp, 32
            call rax

            mov rdi, &ret
            mov [rdi], rax

            pop rdi
            pop rsi
        */
        0x56, 0x57,
        0x48, 0xBE, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x48, 0x8B, 0xE, 0x48, 0x8B, 0x56, 0x8, 0x4C, 0x8B, 0x46, 0x10, 0x4C, 0x8B, 0x4E, 0x18,
        0x48, 0xB8, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x48, 0x83, 0xF8, 0x4, 0x7E, 0xB, 0x48, 0x8B, 0x7C, 0xC6, 0xF8, 0x57, 0x48, 0xFF, 0xC8, 0xEB, 0xEF,
        0x48, 0xB8, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x48, 0x83, 0xEC, 0x20, 0xFF, 0xD0,
        0x48, 0xBF, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x48, 0x89, 0x7,
        0x5F, 0x5E,
        /*	[bits 64]
            push 0x23
            push _next_x86_code
            retfq
        */
        0x6A, 0x23, 0x68, 0x78, 0x56, 0x34, 0x12, 0x48, 0xCB,
        /*	[bits 32]
            mov esp, ebx
            pop ebx
            ret
        */
        0x89, 0xDC, 0x5B,
        0xC3
    };

    static uint32_t ptr = NULL;
    if (!ptr) {
        ptr = (uint32_t)VirtualAlloc(NULL, sizeof(code), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
        for (int i = 0; i < sizeof(code); i++) ((PBYTE)ptr)[i] = code[i];
    }
    *(uint32_t*)(ptr + 9) = ptr + 14;
    *(uint64_t*)(ptr + 18) = (uint64_t)args;
    *(uint64_t*)(ptr + 43) = (uint64_t)argc;
    *(uint64_t*)(ptr + 70) = proc;
    *(uint64_t*)(ptr + 86) = (uint64_t)&ret;
    *(uint32_t*)(ptr + 102) = ptr + 108;
    ((void(*)())ptr)();
    return ret;
}

char* MakeUTFStr(const char* str) {
    uint32_t len = lstrlenA(str);
    char* out = (char*)malloc(16 + (len + 1) * 2);
    *(uint16_t*)(out) = (uint16_t)(len * 2); //Length
    *(uint16_t*)(out + 2) = (uint16_t)((len + 1) * 2); //Max Length

    uint16_t* outstr = (uint16_t*)(out + 16);
    for (uint32_t i = 0; i <= len; i++) outstr[i] = str[i];
    *(uint64_t*)(out + 8) = (uint64_t)(out + 16);
    return out;
}

uint64_t GetKernel32() {
    static uint64_t kernel32 = 0;
    if (kernel32) return kernel32;

    uint64_t ntdll = GetModuleHandle64(L"ntdll.dll");
    uint64_t LdrLoadDll = _GetProcAddress64(ntdll, "LdrLoadDll");
    char* str = MakeUTFStr("kernel32.dll");
    X64Call(LdrLoadDll, 4, (uint64_t)0, (uint64_t)0, (uint64_t)str, (uint64_t)(&kernel32));
    return kernel32;
}
uint64_t GetProcAddress64(uint64_t module, const char* func) {
    static uint64_t K32GetProcAddress = 0;
    if (!K32GetProcAddress)K32GetProcAddress = _GetProcAddress64(GetKernel32(), "GetProcAddress");

    return X64Call(K32GetProcAddress, 2, module, (uint64_t)func);
}
uint64_t LoadLibrary64(const char* name) {
    static uint64_t LoadLibraryA = 0;
    if (!LoadLibraryA) LoadLibraryA = GetProcAddress64(GetKernel32(), "LoadLibraryA");

    return X64Call(LoadLibraryA, 1, (uint64_t)name);
}
void Test() {
    uint64_t kernel32 = GetKernel32();
    uint64_t user32 = LoadLibrary64("user32.dll");
    uint64_t MessageBox64 = GetProcAddress64(user32, "MessageBoxA");
    X64Call(MessageBox64, 4, (uint64_t)NULL, (uint64_t)"Wowowowowow", (uint64_t)"Wowowowowow", (uint64_t)NULL);
}

int main() {
    Test();
}

需要双击打开才可以看到效果

参考

http://blog.bluesadi.cn:4000/2021/11/06/%E5%A4%A9%E5%A0%82%E4%B9%8B%E9%97%A8-Heaven-s-Gate-C%E8%AF%AD%E8%A8%80%E5%AE%9E%E7%8E%B0/

https://rce.co/knockin-on-heavens-gate-dynamic-processor-mode-switching/

https://s.itho.me/ccms_slides/2021/5/13/a23144e3-4313-4b9b-89c2-985fb3832b4e.pdf

https://www.freebuf.com/articles/web/209983.html

https://wbenny.github.io/2018/11/04/wow64-internals.html