程序分析-klee查找bug过程

慎峻

2023-12-01

klee中目前定义了一些异常退出机制，定义在 ExitOnErrorType 中，其中与bug相关的就包括（不限于）

BadVectorAccess: vector 访问越界
Free：free 非法内存（局部变量和全局变量）
Overflow: 整数溢出检测
Ptr: 指针错误，包括缓冲区溢出等等
ReadOnly: 在只读内存上进行写操作（判断比较简单，这里就不说了）

这里会涉及一些llvm指令的知识，具体可以参考官方文档

1.系统API调用处理

在 Executor::executeInstruction中执行的指令为Call指令时会调用 Executor::executeCall，Executor::executeCall中当调用的函数为外部函数时会调用Executor::callExternalFunction，Executor::callExternalFunction 首先会检查有没有对应的回调函数来处理这个API调用，如果有，直接用回调函数处理。

回调函数的定义可以参考SpecialFunctionHandler.cpp

1.1.Free

这里主要关注跟 free 有关的回调函数，在SpecialFunctionHandler.cpp 有

add("free", handleFree, false)，false 表示没有返回值
add("_ZdaPv", handleDeleteArray, false)
add("_ZdlPv", handleDelete, false)
add("realloc", handleRealloc, true)

大概就是 free函数调用klee对应的回调函数是 handleFree，对于 delete 指令回调函数是 handleDelete，这4个回调函数的共同点就是调用了Executor::executeFree，代码如下：

void Executor::executeFree(ExecutionState &state,
                           ref<Expr> address,
                           KInstruction *target) {
	address = optimizer.optimizeExpr(address, true);
    StatePair zeroPointer =
      fork(state, Expr::createIsZero(address), true, BranchType::Free);
    if (zeroPointer.first) { // 为空指针的情况
      if (target)
        bindLocal(target, *zeroPointer.first, Expr::createPointer(0));
    }

    if (zeroPointer.second) { // 不为空指针的情况
		ExactResolutionList rl;
	    resolveExact(*zeroPointer.second, address, rl, "free");
	    
	    for (Executor::ExactResolutionList::iterator it = rl.begin(), 
	           ie = rl.end(); it != ie; ++it) {
	      const MemoryObject *mo = it->first.first;
	      if (mo->isLocal) {
	        terminateStateOnError(*it->second, "free of alloca",
	                              StateTerminationType::Free,
	                              getAddressInfo(*it->second, address));
	      } else if (mo->isGlobal) {
	        terminateStateOnError(*it->second, "free of global",
	                              StateTerminationType::Free,
	                              getAddressInfo(*it->second, address));
	      } else {
	        it->second->addressSpace.unbindObject(mo);
	        if (target)
	          bindLocal(target, *it->second, Expr::createPointer(0));
	      }
	    }
	}
  }
}

如果 free 的地址处于局部变量或者全局变量区则报错，因为 free 的操作只能作用在堆区。不过这里并没有进行use-after-free检查。

对于 free 和 realloc，这里在检查之前还调用了Executor::resolveExact 函数，这个函数的作用是解析指针为指向它可能指向的的内存对象的起始地址，必要时分叉执行，并为指向无效位置（超出边界或对象中间的地址）的指针生成错误，这里目标就是检查有没有可能 free 无效目标。

1.2.Overflow

klee的整数溢出检测实际上是建立在UBSAN之上的，本身并没有Overflow检测，以下面这个代码作为示例：

test.c

#include <klee/klee.h>

int main() {
  int a;
  klee_make_symbolic(&a, sizeof(a), "a");
  int b = a + 1;
  return 0;
}

用 clang -I $KLEE_DIR/include -emit-llvm -S -g -O0 -Xclang -disable-O0-optnone -fsanitize=signed-integer-overflow test.c，得到的llvm IR如下：

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 !dbg !7 {
entry:
  %retval = alloca i32, align 4
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  call void @llvm.dbg.declare(metadata i32* %a, metadata !11, metadata !DIExpression()), !dbg !12
  %0 = bitcast i32* %a to i8*, !dbg !13
  call void @klee_make_symbolic(i8* %0, i64 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)), !dbg !14
  call void @llvm.dbg.declare(metadata i32* %b, metadata !15, metadata !DIExpression()), !dbg !16
  %1 = load i32, i32* %a, align 4, !dbg !17
  %add = add nsw i32 %1, 1, !dbg !18
  store i32 %add, i32* %b, align 4, !dbg !16
  ret i32 0, !dbg !19
}

; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

declare dso_local void @klee_make_symbolic(i8*, i64, i8*) #2

attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "/home/prophe/projects/c/klee_samples/ubsan")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 11.0.0"}
!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocalVariable(name: "a", scope: !7, file: !1, line: 4, type: !10)
!12 = !DILocation(line: 4, column: 7, scope: !7)
!13 = !DILocation(line: 5, column: 22, scope: !7)
!14 = !DILocation(line: 5, column: 3, scope: !7)
!15 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 6, type: !10)
!16 = !DILocation(line: 6, column: 7, scope: !7)
!17 = !DILocation(line: 6, column: 11, scope: !7)
!18 = !DILocation(line: 6, column: 13, scope: !7)
!19 = !DILocation(line: 7, column: 3, scope: !7)

再用 clang -I $KLEE_DIR/include -emit-llvm -S -g -O0 -Xclang -disable-O0-optnone -fsanitize=signed-integer-overflow test.c，再次查看llvm IR

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
@.src = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
@0 = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
@1 = private unnamed_addr global { { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* } { { [7 x i8]*, i32, i32 } { [7 x i8]* @.src, i32 6, i32 13 }, { i16, i16, [6 x i8] }* @0 }

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 !dbg !7 {
entry:
  %retval = alloca i32, align 4
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  store i32 0, i32* %retval, align 4
  call void @llvm.dbg.declare(metadata i32* %a, metadata !11, metadata !DIExpression()), !dbg !12
  %0 = bitcast i32* %a to i8*, !dbg !13
  call void @klee_make_symbolic(i8* %0, i64 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)), !dbg !14
  call void @llvm.dbg.declare(metadata i32* %b, metadata !15, metadata !DIExpression()), !dbg !16
  %1 = load i32, i32* %a, align 4, !dbg !17
  %2 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 1), !dbg !18, !nosanitize !2
  %3 = extractvalue { i32, i1 } %2, 0, !dbg !18, !nosanitize !2
  %4 = extractvalue { i32, i1 } %2, 1, !dbg !18, !nosanitize !2
  %5 = xor i1 %4, true, !dbg !18, !nosanitize !2
  br i1 %5, label %cont, label %handler.add_overflow, !dbg !18, !prof !19, !nosanitize !2

handler.add_overflow:                             ; preds = %entry
  %6 = zext i32 %1 to i64, !dbg !18, !nosanitize !2
  call void @__ubsan_handle_add_overflow(i8* bitcast ({ { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* }* @1 to i8*), i64 %6, i64 1) #4, !dbg !18, !nosanitize !2
  br label %cont, !dbg !18, !nosanitize !2

cont:                                             ; preds = %handler.add_overflow, %entry
  store i32 %3, i32* %b, align 4, !dbg !16
  ret i32 0, !dbg !20
}

; Function Attrs: nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

declare dso_local void @klee_make_symbolic(i8*, i64, i8*) #2

; Function Attrs: nounwind readnone speculatable willreturn
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1

; Function Attrs: uwtable
declare dso_local void @__ubsan_handle_add_overflow(i8*, i64, i64) #3

attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable willreturn }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { uwtable }
attributes #4 = { nounwind }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.c", directory: "/home/prophe/projects/c/klee_samples/ubsan")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 11.0.0"}
!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocalVariable(name: "a", scope: !7, file: !1, line: 4, type: !10)
!12 = !DILocation(line: 4, column: 7, scope: !7)
!13 = !DILocation(line: 5, column: 22, scope: !7)
!14 = !DILocation(line: 5, column: 3, scope: !7)
!15 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 6, type: !10)
!16 = !DILocation(line: 6, column: 7, scope: !7)
!17 = !DILocation(line: 6, column: 11, scope: !7)
!18 = !DILocation(line: 6, column: 13, scope: !7)
!19 = !{!"branch_weights", i32 1048575, i32 1}
!20 = !DILocation(line: 7, column: 3, scope: !7)

可以看到，%add = add nsw i32 %1, 1, !dbg !18 指令变成了 %2 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 1), !dbg !18, !nosanitize !2，这里加数为变量 %1 和常量 1，推测返回值 %2 应该是1个由 int(i32) 和 bool(i1) 组成的结构体，同时下方有

异或指令 %5 = xor i1 %4, true, !dbg !18, !nosanitize !2 和分支指令 br i1 %5, label %cont, label %handler.add_overflow, !dbg !18, !prof !19, !nosanitize !2，%4 为改版加法指令返回的 bool 值，为1会跳转到 %handler.add_overflow，推测加法返回 bool 为1表示触发整数溢出。
%handler.add_overflow 中存在函数调用 call void @__ubsan_handle_add_overflow(i8* bitcast ({ { [7 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* }* @1 to i8*), i64 %6, i64 1) #4, !dbg !18, !nosanitize !2。大概是整数溢出处理函数。

用klee进行符号执行不加ubsan检查的只有1个test生成，值为0

对ubsan编译的代码运行klee的时候会有如下信息报出，并且生成了2个test，第1个值为0，第2个值为2147483647。

KLEE: ERROR: test.c:6: overflow on addition

在klee的SepecialFunctionHandler.cpp有：

add("__ubsan_handle_add_overflow", handleAddOverflow, false),
add("__ubsan_handle_sub_overflow", handleSubOverflow, false),
add("__ubsan_handle_mul_overflow", handleMulOverflow, false),
add("__ubsan_handle_divrem_overflow", handleDivRemOverflow, false)

SpecialFunctionHandler::handleAddOverflow的代码如下：

void SpecialFunctionHandler::handleAddOverflow(
    ExecutionState &state, KInstruction *target,
    std::vector<ref<Expr>> &arguments) {
  executor.terminateStateOnError(state, "overflow on addition",
                                 StateTerminationType::Overflow);
}

直接调用 terminateStateOnError 结束，因此klee检测整数溢出的本质上是clang编译器在做overflow检测，如果存在overflow那么编译器会生成分支，一个分支会跳转到overflow异常处理，klee的目标就是能覆盖到异常处理代码。

2.vector越界检查

extractelement 指令从 vector 从取出指定索引位置的标量。示例 <result> = extractelement <4 x i32> %vec, i32 0 ; yields i32，提取 vec 变量索引0的整数。
insertelement 指令将标量插入vector的指定位置。示例 <result> = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32>，将整数1插入到 vec 索引为0的地方。

当指令类型为InsertElement 和 ExtractElement 时，klee会判断访问是否越界

    if (cIdx == NULL) {
      terminateStateOnExecError(
          state, "InsertElement, support for symbolic index not implemented");
      return;
    }
    uint64_t iIdx = cIdx->getZExtValue();
#if LLVM_VERSION_MAJOR >= 11
    const auto *vt = cast<llvm::FixedVectorType>(iei->getType());
#else
    const llvm::VectorType *vt = iei->getType();
#endif
    unsigned EltBits = getWidthForLLVMType(vt->getElementType());

    if (iIdx >= vt->getNumElements()) {
      // Out of bounds write
      terminateStateOnError(state, "Out of bounds write when inserting element", StateTerminationType::BadVectorAccess);
      return;
    }

不过目前似乎并不支持符号索引，只能在定值下起作用。

3.指针异常

对应 stateTerminationType::Ptr，有以下三种情况：

check_memory_access: memory error: 这种情况需要被测代码调用 klee_check_memory_access 函数，暂时不讨论
memory error: invalid pointer：在 resolveExact 中体现，主要检查有没有可能 free 无效目标，调用 free 和 realloc 的时候可能出现。
memory error: out of bound pointer：执行 load, store 指令，调用 va_arg 函数的时候可能出现。这里主要分析 load 和 store 中的情况。

这2个指令示例如下：

store i32 3, i32* %ptr ;  # *ptr = 3
%val = load i32, i32* %ptr ;  # val = ptr*

case Instruction::Load: {
    ref<Expr> base = eval(ki, 0, state).value; // 指向地址
    executeMemoryOperation(state, false, base, 0, ki);
    break;
  }
case Instruction::Store: {
    ref<Expr> base = eval(ki, 1, state).value; // 指向地址
    ref<Expr> value = eval(ki, 0, state).value; // 存储的值
    executeMemoryOperation(state, true, base, value, 0);
    break;
  }

Executor::executeMemoryOperation部分代码如下（只关注检测是否越界部分）

// 求解address上所有可能存在的memory object，比如 *(p+i)指针和*(q+j)可能都指向address，不同的指针可能会存在重复区域
bool incomplete = state.addressSpace.resolve(state, solver, address, rl, 0,  coreSolverTimeout);
solver->setTimeout(time::Span());
  
// XXX there is some query wasteage here. who cares?
ExecutionState *unbound = &state;

// 遍历所有的memory object
for (ResolutionList::iterator i = rl.begin(), ie = rl.end(); i != ie; ++i) {
    const MemoryObject *mo = i->first;
    const ObjectState *os = i->second;
    ref<Expr> inBounds = mo->getBoundsCheckPointer(address, bytes);
    
    StatePair branches = fork(*unbound, inBounds, true, BranchType::MemOp);
    ExecutionState *bound = branches.first;

    // bound can be 0 on failure or overlapped 
    if (bound) {
      if (isWrite) {
        if (os->readOnly) {
          terminateStateOnError(*bound, "memory error: object read only",
                                StateTerminationType::ReadOnly);
        } else {
          ObjectState *wos = bound->addressSpace.getWriteable(mo, os);
          wos->write(mo->getOffsetExpr(address), value);
        }
      } else {
        ref<Expr> result = os->read(mo->getOffsetExpr(address), type);
        bindLocal(target, *bound, result);
      }
    }

    unbound = branches.second;
    // 如果这个memory object不会越界退出，也就是只要这个address只要找到1个memory object不可能越界那就不算越界读写
    if (!unbound)
      break;
}

// 所有的memory object都可能发生越界读写，那么报错
// XXX should we distinguish out of bounds and overlapped cases?
if (unbound) {
    if (incomplete) {
      terminateStateOnSolverError(*unbound, "Query timed out (resolve).");
    } 
    else {
      terminateStateOnError(*unbound, "memory error: out of bound pointer", StateTerminationType::Ptr, getAddressInfo(*unbound, address));
    }
  }

首先1个地址可能会对应多个memory object，*(p + i) == *(q + j)，像 int *p = a; p++; 这种语句就存在1个address会对应 p, a 2个memmory object的情况。对于1个address的读写，如果能找到1个memory object，满足对于该object这个地址一定合法，那么这就不算越界读写，这里 .addressSpace.resolve 过程以后再研究。

4.其它bug

这里涉及到的缓冲区只包含了指针访问这种，还没涉及到 memcpy 内存拷贝这种溢出，这部分在klee主程序中并没有找到对应代码，但是klee却可以查找这类bug，推测是在klee-uclibc中实现了。

程序分析-klee查找bug过程

1.系统API调用处理

1.1.Free

1.2.Overflow

2.vector越界检查

3.指针异常

4.其它bug

相关阅读

相关文章

相关问答

相关文档