前言
一直想学fuzz,但是由于自己太懒,一直没开始,最近终于开始学了,一直认为学习一个东西最好的办法,就是阅读源码,只有理解了源码,才能一通百通,我将从
afl-gcc
开始,一步一步的分析AFL的源码。源码下载:
1
git clone https://github.com/google/AFL.git
afl-gcc
afl-gcc.c
的main函数如下:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37int main(int argc, char** argv) {
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
"for gcc or clang, letting you recompile third-party code with the required\n"
"runtime instrumentation. A common use pattern would be one of the following:\n\n"
" CC=%s/afl-gcc ./configure\n"
" CXX=%s/afl-g++ ./configure\n\n"
"You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
"Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
BIN_PATH, BIN_PATH);
exit(1);
}
find_as(argv[0]);
edit_params(argc, argv);
execvp(cc_params[0], (char**)cc_params);
FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);
return 0;
}我们需要关注的是
find_as
函数与edit_params
函数,首先来看find_as
函数,该函数用于寻找汇编器的位置:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50static void find_as(u8* argv0) {
u8 *afl_path = getenv("AFL_PATH");
u8 *slash, *tmp;
if (afl_path) {
tmp = alloc_printf("%s/as", afl_path);
if (!access(tmp, X_OK)) {
as_path = afl_path;
ck_free(tmp);
return;
}
ck_free(tmp);
}
slash = strrchr(argv0, '/');
if (slash) {
u8 *dir;
*slash = 0;
dir = ck_strdup(argv0);
*slash = '/';
tmp = alloc_printf("%s/afl-as", dir);
if (!access(tmp, X_OK)) {
as_path = dir;
ck_free(tmp);
return;
}
ck_free(tmp);
ck_free(dir);
}
if (!access(AFL_PATH "/as", X_OK)) {
as_path = AFL_PATH;
return;
}
FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH");
}该函数首先获取了一下
AFL_PATH
这个环境变量,如果存在,那么将会访问AFL_PATH/as
这个文件,如果能够访问,那么,AFL_PATH
的路径就会被赋值给as_path
,如果不存在,将会获取到路径中最后一次出现/
的位置,并把/
前的路径作为dir
,然后在该路径下寻找是否存在/afl-as
,如果存在,则把dir
的路径赋值给as_path
,如果以上两种办法都没有找到,就会报错。接下来我们来看edit_params函数,首先来看获取参数的部分:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73/* Copy argv to cc_params, making the necessary edits. */
static void edit_params(u32 argc, char** argv) {
u8 fortify_set = 0, asan_set = 0;
u8 *name;
u8 m32_set = 0;
cc_params = ck_alloc((argc + 128) * sizeof(u8*));
name = strrchr(argv[0], '/');
if (!name) name = argv[0]; else name++;
if (!strncmp(name, "afl-clang", 9)) {
clang_mode = 1;
setenv(CLANG_ENV_VAR, "1", 1);
if (!strcmp(name, "afl-clang++")) {
u8* alt_cxx = getenv("AFL_CXX");
cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++";
} else {
u8* alt_cc = getenv("AFL_CC");
cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
}
} else {
/* With GCJ and Eclipse installed, you can actually compile Java! The
instrumentation will work (amazingly). Alas, unhandled exceptions do
not call abort(), so afl-fuzz would need to be modified to equate
non-zero exit codes with crash conditions when working with Java
binaries. Meh. */
if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX");
else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ");
else cc_params[0] = getenv("AFL_CC");
if (!cc_params[0]) {
SAYF("\n" cLRD "[-] " cRST
"On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n"
" 'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n"
" set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n");
FATAL("AFL_CC or AFL_CXX required on MacOS X");
}
if (!strcmp(name, "afl-g++")) {
u8* alt_cxx = getenv("AFL_CXX");
cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++";
} else if (!strcmp(name, "afl-gcj")) {
u8* alt_cc = getenv("AFL_GCJ");
cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
} else {
u8* alt_cc = getenv("AFL_CC");
cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
}
}从函数名就可以看出,该函数主要是设置必要的参数,函数首先通过比较
strncmp
来比较name,从而确定调用afl-g++
、afl-gcc
、afl-clang
还是afl-clang++
来对程序进行编译,如果是clang*
,需要先将clang_mode
设置为1,然后再来判断是调用clang++
还是clang
,并查找环境变量,来决定最终对cc_params[0]
的赋值。接下来我们看为gcc添加参数的部分:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118while (--argc) {
u8* cur = *(++argv);
if (!strncmp(cur, "-B", 2)) {
if (!be_quiet) WARNF("-B is already set, overriding");
if (!cur[2] && argc > 1) { argc--; argv++; }
continue;
}
if (!strcmp(cur, "-integrated-as")) continue;
if (!strcmp(cur, "-pipe")) continue;
if (!strcmp(cur, "-m32")) m32_set = 1;
if (!strcmp(cur, "-fsanitize=address") ||
!strcmp(cur, "-fsanitize=memory")) asan_set = 1;
if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;
cc_params[cc_par_cnt++] = cur;
}
cc_params[cc_par_cnt++] = "-B";
cc_params[cc_par_cnt++] = as_path;
if (clang_mode)
cc_params[cc_par_cnt++] = "-no-integrated-as";
if (getenv("AFL_HARDEN")) {
cc_params[cc_par_cnt++] = "-fstack-protector-all";
if (!fortify_set)
cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2";
}
if (asan_set) {
/* Pass this on to afl-as to adjust map density. */
setenv("AFL_USE_ASAN", "1", 1);
} else if (getenv("AFL_USE_ASAN")) {
if (getenv("AFL_USE_MSAN"))
FATAL("ASAN and MSAN are mutually exclusive");
if (getenv("AFL_HARDEN"))
FATAL("ASAN and AFL_HARDEN are mutually exclusive");
cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
cc_params[cc_par_cnt++] = "-fsanitize=address";
} else if (getenv("AFL_USE_MSAN")) {
if (getenv("AFL_USE_ASAN"))
FATAL("ASAN and MSAN are mutually exclusive");
if (getenv("AFL_HARDEN"))
FATAL("MSAN and AFL_HARDEN are mutually exclusive");
cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
cc_params[cc_par_cnt++] = "-fsanitize=memory";
}
if (!getenv("AFL_DONT_OPTIMIZE")) {
/* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
works OK. This has nothing to do with us, but let's avoid triggering
that bug. */
if (!clang_mode || !m32_set)
cc_params[cc_par_cnt++] = "-g";
cc_params[cc_par_cnt++] = "-g";
cc_params[cc_par_cnt++] = "-O3";
cc_params[cc_par_cnt++] = "-funroll-loops";
/* Two indicators that you're building for fuzzing; one of them is
AFL-specific, the other is shared with libfuzzer. */
cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";
}
if (getenv("AFL_NO_BUILTIN")) {
cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr";
}
cc_params[cc_par_cnt] = NULL;
}程序会跳过
-B/-integrated-as/-pipe
这几个参数,然后判断是否存在-fsanitize=address
或-fsanitize=memory
参数,如果存在,则将ASAN_set
赋值为1,然后赋值cc_params[1]=cur
,然后接下来程序自己添加了参数-B as_path
,接下来前面设置了clang_mode=1
,将会添加参数-no-integrated-as
,接下来检查是否存在AFL_HARDEN
环境变量,存在的话,将会添加参数-fstack-protector-all
,如果存在该环境变量,还会检查fortify_set
是否为0,如果为0,将会添加参数-D_FORTIFY_SOURCE=2
,接下来会检查asan_set
是否为1,如果为1了,将会进一步检察环境变量,并加上相应参数,这里与前面差不多,就不再赘述了,当不存在环境变量AFL_DONT_OPTIMIZE
时,程序还会添加参数-g
,-03
,-funroll-loops
,-D__AFL_COMPILER=1
,-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1
,如果存在环境变量AFL_NO_BUILTIN
,程序将会添加参数-fno-builtin-strcmp
。最后程序会向数组中放入
\x00
来结束输入。接下来,当
find_as
函数与edit_params
函数执行完成以后,程序将会调用execve
函数,最终调用gcc
。afl-as
afl-as
函数,是对as
的包装,main函数实现如下:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80int main(int argc, char** argv) {
s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO");
struct timeval tv;
struct timezone tz;
clang_mode = !!getenv(CLANG_ENV_VAR);
if (isatty(2) && !getenv("AFL_QUIET")) {
SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
} else be_quiet = 1;
if (argc < 2) {
SAYF("\n"
"This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
"executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
"don't want to run this program directly.\n\n"
"Rarely, when dealing with extremely complex projects, it may be advisable to\n"
"set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
"instrumenting every discovered branch.\n\n");
exit(1);
}
gettimeofday(&tv, &tz);
rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
srandom(rand_seed);
edit_params(argc, argv);
if (inst_ratio_str) {
if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
}
if (getenv(AS_LOOP_ENV_VAR))
FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
setenv(AS_LOOP_ENV_VAR, "1", 1);
/* When compiling with ASAN, we don't have a particularly elegant way to skip
ASAN-specific branches. But we can probabilistically compensate for
that... */
if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
sanitizer = 1;
inst_ratio /= 3;
}
if (!just_version) add_instrumentation();
if (!(pid = fork())) {
execvp(as_params[0], (char**)as_params);
FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
}
if (pid < 0) PFATAL("fork() failed");
if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
exit(WEXITSTATUS(status));
}我们首先来关注
edit_params
函数,函数的源码如下:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126static void edit_params(int argc, char** argv) {
u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
u32 i;
u8 use_clang_as = 0;
/* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
with the code generated by newer versions of clang that are hand-built
by the user. See the thread here: http://goo.gl/HBWDtn.
To work around this, when using clang and running without AFL_AS
specified, we will actually call 'clang -c' instead of 'as -q' to
compile the assembly file.
The tools aren't cmdline-compatible, but at least for now, we can
seemingly get away with this by making only very minor tweaks. Thanks
to Nico Weber for the idea. */
if (clang_mode && !afl_as) {
use_clang_as = 1;
afl_as = getenv("AFL_CC");
if (!afl_as) afl_as = getenv("AFL_CXX");
if (!afl_as) afl_as = "clang";
}
/* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
is not set. We need to check these non-standard variables to properly
handle the pass_thru logic later on. */
if (!tmp_dir) tmp_dir = getenv("TEMP");
if (!tmp_dir) tmp_dir = getenv("TMP");
if (!tmp_dir) tmp_dir = "/tmp";
as_params = ck_alloc((argc + 32) * sizeof(u8*));
as_params[0] = afl_as ? afl_as : (u8*)"as";
as_params[argc] = 0;
for (i = 1; i < argc - 1; i++) {
if (!strcmp(argv[i], "--64")) use_64bit = 1;
else if (!strcmp(argv[i], "--32")) use_64bit = 0;
/* The Apple case is a bit different... */
if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
else if (!strcmp(argv[i + 1], "i386"))
FATAL("Sorry, 32-bit Apple platforms are not supported.");
}
/* Strip options that set the preference for a particular upstream
assembler in Xcode. */
if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
continue;
as_params[as_par_cnt++] = argv[i];
}
/* When calling clang as the upstream assembler, append -c -x assembler
and hope for the best. */
if (use_clang_as) {
as_params[as_par_cnt++] = "-c";
as_params[as_par_cnt++] = "-x";
as_params[as_par_cnt++] = "assembler";
}
input_file = argv[argc - 1];
if (input_file[0] == '-') {
if (!strcmp(input_file + 1, "-version")) {
just_version = 1;
modified_file = input_file;
goto wrap_things_up;
}
if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
else input_file = NULL;
} else {
/* Check if this looks like a standard invocation as a part of an attempt
to compile a program, rather than using gcc on an ad-hoc .s file in
a format we may not understand. This works around an issue compiling
NSS. */
if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
strncmp(input_file, "/var/tmp/", 9) &&
strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
}
modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
(u32)time(NULL));
wrap_things_up:
as_params[as_par_cnt++] = modified_file;
as_params[as_par_cnt] = NULL;
}首先获取了环境变量
TMPDIR
是否存在,如果存在,则将tmp_dir
设置为该环境变量的值,如果不存在,将会依次获取TEMP
,TMP
环境变量,如果都不存在,则将tmp_dir
设置为/tmp
,然后判断afl_as
是否存在,如果存在,则将as_params[0]
设置为afl_as
,否则,设置为as
。然后通过判断参数
--64
是否存在,如果存在,则use_64bit
为真,否则为0
,并将as_params[as_par_cnt++]
的值,设置为argv[i]
。接下来,将
argv[argc-1]
的值,作为输入文件,接下来,依次比较input_file
与tmp_dir
,/var/tmp
,/tmp/
是否相等,如果相等,则将pass_thru
设置为1。并将modified_file
设置为tmp
目录下的.s
文件。然后进行
as_params[as_par_cnt++] = modified_file;
以及as_params[as_par_cnt] = NULL
。接下来我们来看另外一个函数
add_instrumentation
,该函数用于对文件的插桩,是重点函数,函数源码如下:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246static void add_instrumentation(void) {
static u8 line[MAX_LINE];
FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;
u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
skip_intel = 0, skip_app = 0, instrument_next = 0;
u8* colon_pos;
if (input_file) {
inf = fopen(input_file, "r");
if (!inf) PFATAL("Unable to read '%s'", input_file);
} else inf = stdin;
outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
outf = fdopen(outfd, "w");
if (!outf) PFATAL("fdopen() failed");
while (fgets(line, MAX_LINE, inf)) {
/* In some cases, we want to defer writing the instrumentation trampoline
until after all the labels, macros, comments, etc. If we're in this
mode, and if the line starts with a tab followed by a character, dump
the trampoline now. */
if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
instrument_next && line[0] == '\t' && isalpha(line[1])) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
instrument_next = 0;
ins_lines++;
}
/* Output the actual line, call it a day in pass-thru mode. */
fputs(line, outf);
if (pass_thru) continue;
/* All right, this is where the actual fun begins. For one, we only want to
instrument the .text section. So, let's keep track of that in processed
files - and let's set instr_ok accordingly. */
if (line[0] == '\t' && line[1] == '.') {
/* OpenBSD puts jump tables directly inline with the code, which is
a bit annoying. They use a specific format of p2align directives
around them, so we use that as a signal. */
if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
if (!strncmp(line + 2, "text\n", 5) ||
!strncmp(line + 2, "section\t.text", 13) ||
!strncmp(line + 2, "section\t__TEXT,__text", 21) ||
!strncmp(line + 2, "section __TEXT,__text", 21)) {
instr_ok = 1;
continue;
}
if (!strncmp(line + 2, "section\t", 8) ||
!strncmp(line + 2, "section ", 8) ||
!strncmp(line + 2, "bss\n", 4) ||
!strncmp(line + 2, "data\n", 5)) {
instr_ok = 0;
continue;
}
}
/* Detect off-flavor assembly (rare, happens in gdb). When this is
encountered, we set skip_csect until the opposite directive is
seen, and we do not instrument. */
if (strstr(line, ".code")) {
if (strstr(line, ".code32")) skip_csect = use_64bit;
if (strstr(line, ".code64")) skip_csect = !use_64bit;
}
/* Detect syntax changes, as could happen with hand-written assembly.
Skip Intel blocks, resume instrumentation when back to AT&T. */
if (strstr(line, ".intel_syntax")) skip_intel = 1;
if (strstr(line, ".att_syntax")) skip_intel = 0;
/* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
if (line[0] == '#' || line[1] == '#') {
if (strstr(line, "#APP")) skip_app = 1;
if (strstr(line, "#NO_APP")) skip_app = 0;
}
/* If we're in the right mood for instrumenting, check for function
names or conditional labels. This is a bit messy, but in essence,
we want to catch:
^main: - function entry point (always instrumented)
^.L0: - GCC branch label
^.LBB0_0: - clang branch label (but only in clang mode)
^\tjnz foo - conditional branches
...but not:
^# BB#0: - clang comments
^ # BB#0: - ditto
^.Ltmp0: - clang non-branch labels
^.LC0 - GCC non-branch labels
^.LBB0_0: - ditto (when in GCC mode)
^\tjmp foo - non-conditional jumps
Additionally, clang and GCC on MacOS X follow a different convention
with no leading dots on labels, hence the weird maze of #ifdefs
later on.
*/
if (skip_intel || skip_app || skip_csect || !instr_ok ||
line[0] == '#' || line[0] == ' ') continue;
/* Conditional branch instruction (jnz, etc). We append the instrumentation
right after the branch (to instrument the not-taken path) and at the
branch destination label (handled later on). */
if (line[0] == '\t') {
if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
R(MAP_SIZE));
ins_lines++;
}
continue;
}
/* Label of some sort. This may be a branch destination, but we need to
tread carefully and account for several different formatting
conventions. */
/* Apple: L<whatever><digit>: */
if ((colon_pos = strstr(line, ":"))) {
if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
/* Everybody else: .L<whatever>: */
if (strstr(line, ":")) {
if (line[0] == '.') {
/* .L0: or LBB0_0: style jump destination */
/* Apple: L<num> / LBB<num> */
if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
&& R(100) < inst_ratio) {
/* Apple: .L<num> / .LBB<num> */
if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
&& R(100) < inst_ratio) {
/* An optimization is possible here by adding the code only if the
label is mentioned in the code in contexts other than call / jmp.
That said, this complicates the code by requiring two-pass
processing (messy with stdin), and results in a speed gain
typically under 10%, because compilers are generally pretty good
about not generating spurious intra-function jumps.
We use deferred output chiefly to avoid disrupting
.Lfunc_begin0-style exception handling calculations (a problem on
MacOS X). */
if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
}
} else {
/* Function label (always instrumented, deferred mode). */
instrument_next = 1;
}
}
}
if (ins_lines)
fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
if (input_file) fclose(inf);
fclose(outf);
if (!be_quiet) {
if (!ins_lines) WARNF("No instrumentation targets found%s.",
pass_thru ? " (pass-thru mode)" : "");
else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
ins_lines, use_64bit ? "64" : "32",
getenv("AFL_HARDEN") ? "hardened" :
(sanitizer ? "ASAN/MSAN" : "non-hardened"),
inst_ratio);
}
}首先判断了
input_files
是否存在,如果存在,则打开它,并进行下一步,打开modified_file
,并从输入文件中,读入内容到line
数组中。接下来,根据代码的注释可以知道,只需要对
.text
段进行插桩,所以接下来进行了一系列的判断:instr_ok
用来判断是否为text
段,如果是则为1,否则为0。skip_csect
用来判断是32位还是64位,如果为64位则为1,否则为0。skip_intel
用来判断是否为intel
架构,如果是则为1,否则为0。skip_app
用来判断ad-hoc __asm__
块(咱就是说不懂这是啥),如果存在则为1,否则为0。
接下来,程序会判断各个标志位的值,并判断第一行是否为
#
或^\tjnz foo - conditional branches
,满足该条件,且R(100)<inst_ratio
,则会进行插桩,这里的R(100)
是一个宏定义,用来对随机数取余,而inst_ratio
则是插桩的密度。然后根据前面设置的use_64bit
是否为1,来判断是写入trampoline_fmt_64
还是trampoline_fmt_32
。接下来会判断
lines
数组中是否有:
,并且第一个字符是否是.
,如果是,则代表需要对函数或者分支指令插桩。接下来判断如下:.L0:
则说明是GCC下的分支指令,进行插桩。.LBB0_0:
且clang_mode
为1,则说明是clang下的分支指令,进行插桩。- 两者都不是的话,则说明是一个函数,对其进行插桩。
然后根据架构,向
modified_file
中,写入main_payload_64
或者main_payload_32
,并关闭文件。到这里,
afl-as
也就基本讲完了,主函数中会调用这两个函数,对程序进行插桩。
AFL源码分析(一)
- 版权声明: 本博客所有文章除特别声明外,著作权归作者所有。转载请注明出处!