klee中目前定义了一些异常退出机制,定义在 ExitOnErrorType 中,其中与bug相关的就包括(不限于)
BadVectorAccess
: vector
访问越界
Free
:free
非法内存(局部变量和全局变量)
Overflow
: 整数溢出检测
Ptr
: 指针错误,包括缓冲区溢出等等
ReadOnly
: 在只读内存上进行写操作(判断比较简单,这里就不说了)
这里会涉及一些llvm指令的知识,具体可以参考官方文档
在 Executor::executeInstruction中执行的指令为Call指令时会调用 Executor::executeCall,Executor::executeCall中当调用的函数为外部函数时会调用Executor::callExternalFunction,Executor::callExternalFunction 首先会检查有没有对应的回调函数来处理这个API调用,如果有,直接用回调函数处理。
回调函数的定义可以参考SpecialFunctionHandler.cpp
这里主要关注跟 free
有关的回调函数,在SpecialFunctionHandler.cpp 有
add("free", handleFree, false)
,false
表示没有返回值
add("_ZdaPv", handleDeleteArray, false)
add("_ZdlPv", handleDelete, false)
add("realloc", handleRealloc, true)
大概就是 free
函数调用klee对应的回调函数是 handleFree
,对于 delete
指令回调函数是 handleDelete
,这4个回调函数的共同点就是调用了Executor::executeFree,代码如下:
void Executor::executeFree(ExecutionState &state,
ref<Expr> address,
KInstruction *target) {
address = optimizer.optimizeExpr(address, true);
StatePair zeroPointer =
fork(state, Expr::createIsZero(address), true, BranchType::Free);
if (zeroPointer.first) { // 为空指针的情况
if (target)
bindLocal(target, *zeroPointer.first, Expr::createPointer(0));
}
if (zeroPointer.second) { // 不为空指针的情况
ExactResolutionList rl;
resolveExact(*zeroPointer.second, address, rl, "free");
for (Executor::ExactResolutionList::iterator it = rl.begin(),
ie = rl.end(); it != ie; ++it) {
const MemoryObject *mo = it->first.first;
if (mo->isLocal) {
terminateStateOnError(*it->second, "free of alloca",
StateTerminationType::Free,
getAddressInfo(*it->second, address));
} else if (mo->isGlobal) {
terminateStateOnError(*it->second, "free of global",
StateTerminationType::Free,
getAddressInfo(*it->second, address));
} else {
it->second->addressSpace.unbindObject(mo);
if (target)
bindLocal(target, *it->second, Expr::createPointer(0));
}
}
}
}
}
如果 free
的地址处于局部变量或者全局变量区则报错,因为 free
的操作只能作用在堆区。不过这里并没有进行use-after-free检查。
对于 free
和 realloc
,这里在检查之前还调用了Executor::resolveExact 函数,这个函数的作用是解析指针为指向它可能指向的的内存对象的起始地址,必要时分叉执行,并为指向无效位置(超出边界或对象中间的地址)的指针生成错误,这里目标就是检查有没有可能 free
无效目标。
klee的整数溢出检测实际上是建立在UBSAN之上的,本身并没有Overflow检测,以下面这个代码作为示例:
test.c
#include <klee/klee.h>
int main() {
int a;
klee_make_symbolic(&a, sizeof(a), "a");
int b = a + 1;
return 0;
}
用 clang -I $KLEE_DIR/include -emit-llvm -S -g -O0 -Xclang -disable-O0-optnone -fsanitize=signed-integer-overflow test.c
,得到的llvm IR如下:
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 !dbg !7 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 0, i32* %retval, align 4
call void @llvm.dbg.declare(metadata i32* %a, metadata !11, metadata !DIExpression()), !dbg !12
%0 = bitcast i32* %a to i8*, !dbg !13
call void @klee_make_symbolic(i8* %0, i64 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)), !dbg !14
call void @llvm.dbg.declare(metadata i32* %b, metadata !15, metadata !DIExpression()), !dbg !16
%1 = load i32, i32* %a, align 4, !dbg !17
%add = add nsw i32 %1, 1, !dbg !18
store i32 %add, i32* %b, align 4, !dbg !16
ret i32 0, !dbg !19
}
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare dso_local void @klee_make_symbolic(i8*, i64, i8*) #2
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "/home/prophe/projects/c/klee_samples/ubsan")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 11.0.0"}
!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocalVariable(name: "a", scope: !7, file: !1, line: 4, type: !10)
!12 = !DILocation(line: 4, column: 7, scope: !7)
!13 = !DILocation(line: 5, column: 22, scope: !7)
!14 = !DILocation(line: 5, column: 3, scope: !7)
!15 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 6, type: !10)
!16 = !DILocation(line: 6, column: 7, scope: !7)
!17 = !DILocation(line: 6, column: 11, scope: !7)
!18 = !DILocation(line: 6, column: 13, scope: !7)
!19 = !DILocation(line: 7, column: 3, scope: !7)
再用 clang -I $KLEE_DIR/include -emit-llvm -S -g -O0 -Xclang -disable-O0-optnone -fsanitize=signed-integer-overflow test.c
,再次查看llvm IR
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
@.src = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
@0 = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
@1 = private unnamed_addr global { { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* } { { [7 x i8]*, i32, i32 } { [7 x i8]* @.src, i32 6, i32 13 }, { i16, i16, [6 x i8] }* @0 }
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 !dbg !7 {
entry:
%retval = alloca i32, align 4
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 0, i32* %retval, align 4
call void @llvm.dbg.declare(metadata i32* %a, metadata !11, metadata !DIExpression()), !dbg !12
%0 = bitcast i32* %a to i8*, !dbg !13
call void @klee_make_symbolic(i8* %0, i64 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)), !dbg !14
call void @llvm.dbg.declare(metadata i32* %b, metadata !15, metadata !DIExpression()), !dbg !16
%1 = load i32, i32* %a, align 4, !dbg !17
%2 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 1), !dbg !18, !nosanitize !2
%3 = extractvalue { i32, i1 } %2, 0, !dbg !18, !nosanitize !2
%4 = extractvalue { i32, i1 } %2, 1, !dbg !18, !nosanitize !2
%5 = xor i1 %4, true, !dbg !18, !nosanitize !2
br i1 %5, label %cont, label %handler.add_overflow, !dbg !18, !prof !19, !nosanitize !2
handler.add_overflow: ; preds = %entry
%6 = zext i32 %1 to i64, !dbg !18, !nosanitize !2
call void @__ubsan_handle_add_overflow(i8* bitcast ({ { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* }* @1 to i8*), i64 %6, i64 1) #4, !dbg !18, !nosanitize !2
br label %cont, !dbg !18, !nosanitize !2
cont: ; preds = %handler.add_overflow, %entry
store i32 %3, i32* %b, align 4, !dbg !16
ret i32 0, !dbg !20
}
; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare dso_local void @klee_make_symbolic(i8*, i64, i8*) #2
; Function Attrs: nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
; Function Attrs: uwtable
declare dso_local void @__ubsan_handle_add_overflow(i8*, i64, i64) #3
attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { uwtable }
attributes #4 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "/home/prophe/projects/c/klee_samples/ubsan")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 11.0.0"}
!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocalVariable(name: "a", scope: !7, file: !1, line: 4, type: !10)
!12 = !DILocation(line: 4, column: 7, scope: !7)
!13 = !DILocation(line: 5, column: 22, scope: !7)
!14 = !DILocation(line: 5, column: 3, scope: !7)
!15 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 6, type: !10)
!16 = !DILocation(line: 6, column: 7, scope: !7)
!17 = !DILocation(line: 6, column: 11, scope: !7)
!18 = !DILocation(line: 6, column: 13, scope: !7)
!19 = !{!"branch_weights", i32 1048575, i32 1}
!20 = !DILocation(line: 7, column: 3, scope: !7)
可以看到,%add = add nsw i32 %1, 1, !dbg !18
指令变成了 %2 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 1), !dbg !18, !nosanitize !2
,这里加数为变量 %1
和常量 1
,推测返回值 %2
应该是1个由 int(i32)
和 bool(i1)
组成的结构体,同时下方有
异或指令 %5 = xor i1 %4, true, !dbg !18, !nosanitize !2
和分支指令 br i1 %5, label %cont, label %handler.add_overflow, !dbg !18, !prof !19, !nosanitize !2
,%4
为改版加法指令返回的 bool
值,为1会跳转到 %handler.add_overflow
,推测加法返回 bool
为1表示触发整数溢出。
%handler.add_overflow
中存在函数调用 call void @__ubsan_handle_add_overflow(i8* bitcast ({ { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* }* @1 to i8*), i64 %6, i64 1) #4, !dbg !18, !nosanitize !2
。大概是整数溢出处理函数。
用klee进行符号执行不加ubsan检查的只有1个test生成,值为0
对ubsan编译的代码运行klee的时候会有如下信息报出,并且生成了2个test,第1个值为0,第2个值为2147483647。
KLEE: ERROR: test.c:6: overflow on addition
在klee的SepecialFunctionHandler.cpp有:
add("__ubsan_handle_add_overflow", handleAddOverflow, false),
add("__ubsan_handle_sub_overflow", handleSubOverflow, false),
add("__ubsan_handle_mul_overflow", handleMulOverflow, false),
add("__ubsan_handle_divrem_overflow", handleDivRemOverflow, false)
SpecialFunctionHandler::handleAddOverflow的代码如下:
void SpecialFunctionHandler::handleAddOverflow(
ExecutionState &state, KInstruction *target,
std::vector<ref<Expr>> &arguments) {
executor.terminateStateOnError(state, "overflow on addition",
StateTerminationType::Overflow);
}
直接调用 terminateStateOnError
结束,因此klee检测整数溢出的本质上是clang编译器在做overflow检测,如果存在overflow那么编译器会生成分支,一个分支会跳转到overflow异常处理,klee的目标就是能覆盖到异常处理代码。
extractelement
指令从 vector
从取出指定索引位置的标量。示例 <result> = extractelement <4 x i32> %vec, i32 0 ; yields i32
,提取 vec
变量索引0的整数。
insertelement
指令将标量插入vector的指定位置。示例 <result> = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32>
,将整数1插入到 vec
索引为0的地方。
当指令类型为InsertElement 和 ExtractElement 时,klee会判断访问是否越界
if (cIdx == NULL) {
terminateStateOnExecError(
state, "InsertElement, support for symbolic index not implemented");
return;
}
uint64_t iIdx = cIdx->getZExtValue();
#if LLVM_VERSION_MAJOR >= 11
const auto *vt = cast<llvm::FixedVectorType>(iei->getType());
#else
const llvm::VectorType *vt = iei->getType();
#endif
unsigned EltBits = getWidthForLLVMType(vt->getElementType());
if (iIdx >= vt->getNumElements()) {
// Out of bounds write
terminateStateOnError(state, "Out of bounds write when inserting element", StateTerminationType::BadVectorAccess);
return;
}
不过目前似乎并不支持符号索引,只能在定值下起作用。
对应 stateTerminationType::Ptr
,有以下三种情况:
check_memory_access: memory error
: 这种情况需要被测代码调用 klee_check_memory_access
函数,暂时不讨论
memory error: invalid pointer
:在 resolveExact
中体现,主要检查有没有可能 free
无效目标,调用 free
和 realloc
的时候可能出现。
memory error: out of bound pointer
:执行 load
, store
指令,调用 va_arg
函数的时候可能出现。这里主要分析 load
和 store
中的情况。
这2个指令示例如下:
store i32 3, i32* %ptr ; # *ptr = 3
%val = load i32, i32* %ptr ; # val = ptr*
case Instruction::Load: {
ref<Expr> base = eval(ki, 0, state).value; // 指向地址
executeMemoryOperation(state, false, base, 0, ki);
break;
}
case Instruction::Store: {
ref<Expr> base = eval(ki, 1, state).value; // 指向地址
ref<Expr> value = eval(ki, 0, state).value; // 存储的值
executeMemoryOperation(state, true, base, value, 0);
break;
}
Executor::executeMemoryOperation部分代码如下(只关注检测是否越界部分)
// 求解address上所有可能存在的memory object,比如 *(p+i)指针和*(q+j)可能都指向address,不同的指针可能会存在重复区域
bool incomplete = state.addressSpace.resolve(state, solver, address, rl, 0, coreSolverTimeout);
solver->setTimeout(time::Span());
// XXX there is some query wasteage here. who cares?
ExecutionState *unbound = &state;
// 遍历所有的memory object
for (ResolutionList::iterator i = rl.begin(), ie = rl.end(); i != ie; ++i) {
const MemoryObject *mo = i->first;
const ObjectState *os = i->second;
ref<Expr> inBounds = mo->getBoundsCheckPointer(address, bytes);
StatePair branches = fork(*unbound, inBounds, true, BranchType::MemOp);
ExecutionState *bound = branches.first;
// bound can be 0 on failure or overlapped
if (bound) {
if (isWrite) {
if (os->readOnly) {
terminateStateOnError(*bound, "memory error: object read only",
StateTerminationType::ReadOnly);
} else {
ObjectState *wos = bound->addressSpace.getWriteable(mo, os);
wos->write(mo->getOffsetExpr(address), value);
}
} else {
ref<Expr> result = os->read(mo->getOffsetExpr(address), type);
bindLocal(target, *bound, result);
}
}
unbound = branches.second;
// 如果这个memory object不会越界退出,也就是只要这个address只要找到1个memory object不可能越界那就不算越界读写
if (!unbound)
break;
}
// 所有的memory object都可能发生越界读写,那么报错
// XXX should we distinguish out of bounds and overlapped cases?
if (unbound) {
if (incomplete) {
terminateStateOnSolverError(*unbound, "Query timed out (resolve).");
}
else {
terminateStateOnError(*unbound, "memory error: out of bound pointer", StateTerminationType::Ptr, getAddressInfo(*unbound, address));
}
}
首先1个地址可能会对应多个memory object,*(p + i) == *(q + j)
,像 int *p = a; p++;
这种语句就存在1个address会对应 p, a
2个memmory object的情况。对于1个address的读写,如果能找到1个memory object,满足对于该object这个地址一定合法,那么这就不算越界读写,这里 .addressSpace.resolve
过程以后再研究。
这里涉及到的缓冲区只包含了指针访问这种,还没涉及到 memcpy
内存拷贝这种溢出,这部分在klee主程序中并没有找到对应代码,但是klee却可以查找这类bug,推测是在klee-uclibc中实现了。