protected:
long SetCPU(uint32_t cpuindex) {
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuindex, &cs);
return sched_setaffinity(0, sizeof(cs), &cs);
}
void operator()(benchmark::State& state) {
if (state.range(0) == 0xffffffff) {
SetCPU(state.thread_index());
} else {
SetCPU(state.range(0));
}
range(0) 即为
#读取int型一列,做baseline
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.734 s 0.736 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.34947G num_rows=16M parquet_parse=4.78942G rowgroups=6 write_time=840.358M
#调整步长
- auto value = int64_array->Value(i);
- memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+ // auto value = int64_array->Value(i);
+ // memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+ auto value_ptr = int64_array->raw_values();
+ memcpy(buffer_address + offsets[i] + field_offset, value_ptr + i*8, 8);
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.816 s 0.818 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.37134G num_rows=16M parquet_parse=4.75935G rowgroups=6 write_time=1.63872G
+ int64_t val_offset = i*8;
+ memcpy(buffer_address + offsets[i] + field_offset, value_ptr + val_offset, 8);
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.812 s 0.814 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.32578G num_rows=16M parquet_parse=4.74461G rowgroups=6 write_time=1.6437G
#调整步长为2
+ int64_t val_offset = i*2;
+ memcpy(buffer_address + offsets[i] + field_offset, value_ptr + val_offset, 8);
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.765 s 0.767 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.51011G num_rows=16M parquet_parse=4.81547G rowgroups=6 write_time=990.263M
- auto value = int64_array->Value(i);
- memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+ // auto value = int64_array->Value(i);
+ // memcpy(buffer_address + offsets[i] + field_offset, &value, sizeof(int64_t));
+ auto value_ptr = int64_array->raw_values();
+ memcpy(buffer_address + offsets[i] + field_offset, &(value_ptr[i]), 8);
------------
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.731 s 0.733 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.32732G num_rows=16M parquet_parse=4.73706G rowgroups=6 write_time=839.743M
+ auto value_ptr = int64_array->raw_values();
+ memcpy(buffer_address + offsets[i] + field_offset, value_ptr + i, 8);
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 0.732 s 0.734 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.33026G num_rows=16M parquet_parse=4.74852G rowgroups=6 write_time=839.259M
# bseline
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 2.79 s 2.79 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.70119G num_rows=16M parquet_parse=4.78144G rowgroups=6 write_time=21.0347G
#先行后列
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
----------------------------------------------------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
----------------------------------------------------------------------------------------------------------------------------------------------
GoogleBenchmarkColumnarToRow/CacheScan/1536/10/iterations:10/process_time/threads:1 18.9 s 18.9 s 10 batch_buffer_size=32k batches=489 columns=16 init_time=6.84436G num_rows=16M parquet_parse=4.7926G rowgroups=6 write_time=182.113G