bcc作为bpf的api库,提供了加载、编译,运行bpf代码功能,其核心功能是对bpf的系统调用进行封装,降低bpf使用门槛。
[root@localhost bcc]# ls
build CMakeLists.txt CONTRIBUTING-SCRIPTS.md Dockerfile.debian Dockerfile.ubuntu examples images introspection LICENSE.txt man README.md snap src tools
cmake CODEOWNERS debian Dockerfile.tests docs FAQ.txt INSTALL.md libbpf-tools LINKS.md QUICKSTART.md scripts SPECS tests
[root@localhost bcc]#
src目录提供了c++、python和lua api接口文件,tools目录提供了示例工具以及示例工具的使用说明,tools目录的工具使用python api编码,sample目录有c++、lua api工具示例,INSTALL.md文件提供了不同平台的安装说明,包括ubuntu、centos、fedora等,具体介绍可以通过README.md和docs文件查看。
class BPF(object):
# From bpf_prog_type in uapi/linux/bpf.h
SOCKET_FILTER = 1
KPROBE = 2
SCHED_CLS = 3
SCHED_ACT = 4
TRACEPOINT = 5
XDP = 6
PERF_EVENT = 7
CGROUP_SKB = 8
CGROUP_SOCK = 9
LWT_IN = 10
LWT_OUT = 11
LWT_XMIT = 12
SOCK_OPS = 13
SK_SKB = 14
CGROUP_DEVICE = 15
SK_MSG = 16
RAW_TRACEPOINT = 17
CGROUP_SOCK_ADDR = 18
TRACING = 26
LSM = 29
............
............
def __init__(self, src_file=b"", hdr_file=b"", text=None, debug=0,
cflags=[], usdt_contexts=[], allow_rlimit=True, device=None):
"""Create a new BPF module with the given source code.
Note:
All fields are marked as optional, but either `src_file` or `text`
must be supplied, and not both.
Args:
src_file (Optional[str]): Path to a source file for the module
hdr_file (Optional[str]): Path to a helper header file for the `src_file`
text (Optional[str]): Contents of a source file for the module
debug (Optional[int]): Flags used for debug prints, can be |'d together
See "Debug flags" for explanation
"""
Python API 以BPF类为核心进行编码设计,BPF_init_ 方法可以通过源文件、obj文件、text文本方式加载bpf程序,提供了5种DEBUG模式:
# Debug output compiled LLVM IR.
DEBUG_LLVM_IR = 0x1
# Debug output loaded BPF bytecode and register state on branches.
DEBUG_BPF = 0x2
# Debug output pre-processor result.
DEBUG_PREPROCESSOR = 0x4
# Debug output ASM instructions embedded with source.
DEBUG_SOURCE = 0x8
# Debug output register state on all instructions in addition to DEBUG_BPF.
DEBUG_BPF_REGISTER_STATE = 0x10
# Debug BTF.
DEBUG_BTF = 0x20
BPF嵌入类型点目前有20种,SOCKET_FILTER、KPROBE等,LSM需要kernel 5.18以上版本才能支持。
BPF类底层实现是BPF的系统调用,API接口层主要负责组织内核程序,包括预加载、编译、加载到内核过程。
def __init__(self, src_file=b"", hdr_file=b"", text=None, debug=0,
cflags=[], usdt_contexts=[], allow_rlimit=True, device=None):
............
............
if src_file:
src_file = BPF._find_file(src_file)
hdr_file = BPF._find_file(hdr_file)
# files that end in ".b" are treated as B files. Everything else is a (BPF-)C file
if src_file.endswith(b".b"):
self.module = lib.bpf_module_create_b(src_file, hdr_file, self.debug, device)
else:
if src_file:
# Read the BPF C source file into the text variable. This ensures,
# that files and inline text are treated equally.
with open(src_file, mode="rb") as file:
text = file.read()
ctx_array = (ct.c_void_p * len(usdt_contexts))()
for i, usdt in enumerate(usdt_contexts):
ctx_array[i] = ct.c_void_p(usdt.get_context())
usdt_text = lib.bcc_usdt_genargs(ctx_array, len(usdt_contexts))
if usdt_text is None:
raise Exception("can't generate USDT probe arguments; " +
"possible cause is missing pid when a " +
"probe in a shared object has multiple " +
"locations")
text = usdt_text + text
self.module = lib.bpf_module_create_c_from_string(text,
self.debug,
cflags_array, len(cflags_array),
allow_rlimit, device)
if not self.module:
raise Exception("Failed to compile BPF module %s" % (src_file or "<text>"))
for usdt_context in usdt_contexts:
usdt_context.attach_uprobes(self)
# If any "kprobe__" or "tracepoint__" or "raw_tracepoint__"
# prefixed functions were defined,
# they will be loaded and attached here.
self._trace_autoload()
加载过程主要由lib.bpf_module_create_c_from_string函数执行,bpf_module_create_c_from_string是bcc中C++的库函数
void * bpf_module_create_c_from_string(const char *text, unsigned flags, const char *cflags[],
int ncflags, bool allow_rlimit, const char *dev_name) {
auto mod = new ebpf::BPFModule(flags, nullptr, true, "", allow_rlimit, dev_name);
if (mod->load_string(text, cflags, ncflags) != 0) {
delete mod;
return nullptr;
}
return mod;
}
最终调用到BPFModule类的load_string方法
// load a C text string
int BPFModule::load_string(const string &text, const char *cflags[], int ncflags) {
if (!sections_.empty()) {
fprintf(stderr, "Program already initialized\n");
return -1;
}
if (int rc = load_cfile(text, true, cflags, ncflags))
return rc;
if (rw_engine_enabled_) {
if (int rc = annotate())
return rc;
} else {
annotate_light();
}
if (int rc = finalize())
return rc;
return 0;
}
load_cfile会使用clang+llvm编译bpf程序(bcc可以在初始化BPF类时自定义编译选项),最终调用到clang api完成编译过程,编译后的内容存储在ebpf::BPFModule对象,并通过指针返回python环境。
BPF最后通过自动加载方式加载ebpf::BPFModule对象到系统,完成bpf程序注入内核工作
def _trace_autoload(self):
for i in range(0, lib.bpf_num_functions(self.module)):
func_name = lib.bpf_function_name(self.module, i)
print(func_name)
if func_name.startswith(b"kprobe__"):
fn = self.load_func(func_name, BPF.KPROBE)
self.attach_kprobe(
event=self.fix_syscall_fnname(func_name[8:]),
fn_name=fn.name)
elif func_name.startswith(b"kretprobe__"):
fn = self.load_func(func_name, BPF.KPROBE)
self.attach_kretprobe(
event=self.fix_syscall_fnname(func_name[11:]),
fn_name=fn.name)
elif func_name.startswith(b"tracepoint__"):
fn = self.load_func(func_name, BPF.TRACEPOINT)
tp = fn.name[len(b"tracepoint__"):].replace(b"__", b":")
self.attach_tracepoint(tp=tp, fn_name=fn.name)
elif func_name.startswith(b"raw_tracepoint__"):
fn = self.load_func(func_name, BPF.RAW_TRACEPOINT)
tp = fn.name[len(b"raw_tracepoint__"):]
self.attach_raw_tracepoint(tp=tp, fn_name=fn.name)
elif func_name.startswith(b"kfunc__"):
self.attach_kfunc(fn_name=func_name)
elif func_name.startswith(b"kretfunc__"):
self.attach_kretfunc(fn_name=func_name)
elif func_name.startswith(b"lsm__"):
self.attach_lsm(fn_name=func_name)
BCC加载bpf程序时使用的是c++的bcc_func_load方法,最后通过sys_bpf(BPF_PROG_LOAD, attr, size);系统调用把bpf程序加载到内核
def load_func(self, func_name, prog_type, device = None):
func_name = _assert_is_bytes(func_name)
if func_name in self.funcs:
return self.funcs[func_name]
if not lib.bpf_function_start(self.module, func_name):
raise Exception("Unknown program %s" % func_name)
log_level = 0
if (self.debug & DEBUG_BPF_REGISTER_STATE):
log_level = 2
elif (self.debug & DEBUG_BPF):
log_level = 1
fd = lib.bcc_func_load(self.module, prog_type, func_name,
lib.bpf_function_start(self.module, func_name),
lib.bpf_function_size(self.module, func_name),
lib.bpf_module_license(self.module),
lib.bpf_module_kern_version(self.module),
log_level, None, 0, device);
if fd < 0:
atexit.register(self.donothing)
if ct.get_errno() == errno.EPERM:
raise Exception("Need super-user privileges to run")
errstr = os.strerror(ct.get_errno())
raise Exception("Failed to load BPF program %s: %s" %
(func_name, errstr))
fn = BPF.Function(self, func_name, fd)
self.funcs[func_name] = fn
return fn
int bcc_func_load(void *program, int prog_type, const char *name,
const struct bpf_insn *insns, int prog_len,
const char *license, unsigned kern_version,
int log_level, char *log_buf, unsigned log_buf_size,
const char *dev_name) {
auto mod = static_cast<ebpf::BPFModule *>(program);
if (!mod) return -1;
return mod->bcc_func_load(prog_type, name, insns, prog_len,
license, kern_version, log_level,
log_buf, log_buf_size, dev_name);
}
int BPFModule::bcc_func_load(int prog_type, const char *name,
const struct bpf_insn *insns, int prog_len,
const char *license, unsigned kern_version,
int log_level, char *log_buf, unsigned log_buf_size,
const char *dev_name) {
struct bpf_load_program_attr attr = {};
unsigned func_info_cnt, line_info_cnt, finfo_rec_size, linfo_rec_size;
void *func_info = NULL, *line_info = NULL;
int ret;
attr.prog_type = (enum bpf_prog_type)prog_type;
attr.name = name;
attr.insns = insns;
attr.license = license;
if (attr.prog_type != BPF_PROG_TYPE_TRACING &&
attr.prog_type != BPF_PROG_TYPE_EXT) {
attr.kern_version = kern_version;
}
attr.log_level = log_level;
if (dev_name)
attr.prog_ifindex = if_nametoindex(dev_name);
if (btf_) {
int btf_fd = btf_->get_fd();
char secname[256];
::snprintf(secname, sizeof(secname), ".bpf.fn.%s", name);
ret = btf_->get_btf_info(secname, &func_info, &func_info_cnt,
&finfo_rec_size, &line_info,
&line_info_cnt, &linfo_rec_size);
if (!ret) {
attr.prog_btf_fd = btf_fd;
attr.func_info = func_info;
attr.func_info_cnt = func_info_cnt;
attr.func_info_rec_size = finfo_rec_size;
attr.line_info = line_info;
attr.line_info_cnt = line_info_cnt;
attr.line_info_rec_size = linfo_rec_size;
}
}
ret = bcc_prog_load_xattr(&attr, prog_len, log_buf, log_buf_size, allow_rlimit_);
if (btf_) {
free(func_info);
free(line_info);
}
return ret;
}
static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
{
int fd;
do {
fd = sys_bpf(BPF_PROG_LOAD, attr, size);
} while (fd < 0 && errno == EAGAIN);
return fd;
}