当前位置: 首页 > 工具软件 > Thrust > 使用案例 >

CUDA初学者-Thrust - Algorithms - Copying(1)

越飞翮
2023-12-01

本人 CUDA小白一枚,要是有什么不对,还望各位大佬指点。
Thrust是一种C++的并行算法库,thrust的接口大大提高了工作的效率,同时还实现了GPU和多核CPU之间的性能的可移植性。
本文及后面的几篇将分别从几个方面来大概阐述一下Thrust的一些接口。原来的网址在这里
1.Algorithms
2.Container Classes
3.Containers
4.Function Objects
5.Iterators
6.Memory Management
7.Numerics
8.Parallel Execution Policies
9.Random Number Generation
10. System
11. Utility

1.Algorithms

1.1 Copying
1.1.1 Gathering
1.1.1.1 gather

template<typename DerivedPolicy, typename InputIterator, typename RandomAccessIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::gather(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator map_first, 
	InputIterator map_last, 
	RandomAccessIterator input_first, 
	OutputIterator result
);

template<typename InputIterator, typename RandomAccessIterator, typename OutputIterator>
OutputIterator thrust::gather(
	InputIterator map_first, 
	InputIterator map_last, 
	RandomAccessIterator input_first, 
	OutputIterator result
);

将源数据按照一定的顺序(map)依次存放到目标地址
例子:

	int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
	thrust::device_vector<int> d_values(values, values + 10);
	int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
	thrust::device_vector<int> d_map(map, map + 10);
	
	thrust::device_vector<int> d_output1(10);
	thrust::device_vector<int> d_output2(10);
	thrust::gather(thrust::device, d_map.begin(), d_map.end(), d_values.begin(), d_output1.begin());
	thrust::gather(d_map.begin(), d_map.end(), d_values.begin(), d_output2.begin());
	// 最终输出结果为{1, 1, 1, 1, 1, 0, 0, 0, 0, 0}

1.1.1.2 gather_if

template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename RandomAccessIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::gather_if(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator1 map_first, 
	InputIterator1 map_last, 
	InputIterator2 stencil,
	RandomAccessIterator input_first, 
	OutputIterator result
);

template<typename InputIterator1, typename InputIterator2, typename RandomAccessIterator, typename OutputIterator>
OutputIterator thrust::gather_if(
	InputIterator1 map_first, 
	InputIterator1 map_last, 
	InputIterator2 stencil,
	RandomAccessIterator input_first, 
	OutputIterator result
);

template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename RandomAccessIterator, typename OutputIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::gather_if(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator1 map_first, 
	InputIterator1 map_last, 
	InputIterator2 stencil,
	RandomAccessIterator input_first, 
	OutputIterator result,
	Predicate pred
);

template<typename InputIterator1, typename InputIterator2, typename RandomAccessIterator, typename OutputIterator, typename Predicate>
OutputIterator thrust::gather_if(
	InputIterator1 map_first, 
	InputIterator1 map_last, 
	InputIterator2 stencil,
	RandomAccessIterator input_first, 
	OutputIterator result,
	Predicate pred
);

gather_if与gather不同的是,拷贝的过程是带有一定的条件的,只有对应位置的变量符合条件,才会将其拷贝到到目的地址。
例子:

int value[10]  = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
thrust::device_vector<int> d_values(values, values + 10);

int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
thrust::device_vector<int> d_stencil(stencil, stencil + 10);

int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
thrust::device_vector<int> d_map(map, map + 10);

thrust::device_vector<int> d_output(10, 7); // 输出初始化
thrust::gather_if(thrust::device, d_map.begin(), d_map.end(), d_stencil.begin(), d_values.begin(), d_output.begin());
// d_output: {7, 7, 7, 7, 7, 7, 7, 7, 7, 7} -> {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}
thrust::device_vector<int> d_output1(10, 7);
thrust::gather_if(d_map.begin(), d_map.end(), d_stencil.begin(), d_values.begin(), d_output1.begin());
// 结果同d_output

struct is_even {
	__host__ __device__
	bool operator()(const int x) { return (x % 2 == 0); }
};

int stencil2[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9};
thrust::device_vector<int> d_stencil2(stencil2, stencil2 + 10);
thrust::device_vector<int> d_output2(10, 7);
thrust::gather_if(thrust::device, d_map.begin(), d_map.end(), d_stencil2.begin(), d_values.begin(), d_output2.begin(), is_even());
// d_output2: {7, 7, 7, 7, 7, 7, 7, 7, 7, 7} -> {0, 7, 4, 7, 8, 7, 3, 7, 7, 7}

thrust::device_vector<int> d_output3(10, 7);
thrust::gather_if(d_map.begin(), d_map.end(), d_stencil2.begin(), d_values.begin(), d_output3.begin(), is_even());
// 结果同d_output2

可以看出,增加了is_even()之后,是利用is_even()的规则,对stencil中的值判断输出,如果符合条件,则输出对应的结果。例子中主要是判断stencil中的奇偶性来结果输出对结果进行拷贝。

1.1.2 Scattering
1.1.2.1 scatter

template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename RandomAccessIterator>
__host__ __device__ OutputIterator thrust::scatter(
    const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	RandomAccessIterator result, 
);

template<typename InputIterator1, typename InputIterator2, typename RandomAccessIterator>
OutputIterator thrust::scatter(
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	RandomAccessIterator result, 
);

将源数据依次按照一定的顺序(map)存放到目标的对应位置。
例子:

int value[10]  = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
thrust::device_vector<int> d_values(values, values + 10);

int map[10]   = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
thrust::device_vector<int> d_map(map, map + 10);

thrust::device_vector<int> d_output(10); 
thrust::scatter(thrust::device, d_values.begin(), d_values.end(), d_map.begin(), d_output.begin());
// d_output: {0, 5, 1, 6, 2, 7, 3, 8, 4, 9}
thrust::device_vector<int> d_output1(10);
thrust::scatter(d_values.begin(), d_values.end(), d_map.begin(), d_output1.begin());
// 结果同d_output

1.1.2.2 scatter_if

template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3,  typename RandomAccessIterator>
__host__ __device__ OutputIterator thrust::scatter_if(
    const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	InputIterator3 stencil,
	RandomAccessIterator result
);

template<typename InputIterator1, typename InputIterator2, typename InputIterator3, typename RandomAccessIterator>
OutputIterator thrust::scatter_if(
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	InputIterator3 stencil,
	RandomAccessIterator result
);

template<typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3,  typename RandomAccessIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::scatter_if(
    const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	InputIterator3 stencil,
	RandomAccessIterator result,
	Predicate pred 
);

template<typename InputIterator1, typename InputIterator2, typename InputIterator3, typename RandomAccessIterator, typename Predicate>
OutputIterator thrust::scatter_if(
	InputIterator1 first, 
	InputIterator1 last, 
	InputIterator2 map,
	InputIterator3 stencil,
	RandomAccessIterator result, 
	Predicate pred 
);

与gather_if中的stencil和pred有点类似作用,都是用来决定元素是否需要拷贝。
例子:

int V[8] = {10, 20, 30 ,40, 50, 60, 70, 80};
int M[8] = {0, 5, 1, 6, 2, 7, 3, 4};
int S[8] = {1, 0, 1, 0, 1, 0, 1, 0};
int D[8] = {0, 0, 0, 0, 0, 0, 0, 0};
thrust::scatter_if(thrust::host, V, V + 8, M, S, D);
// thrust::scatter_if(V, V + 8, M, S, D);
// D : {0, 0, 0, 0, 0, 0, 0, 0} -> {10, 30, 50, 70, 0, 0, 0, 0}

struct is_even {
	__host__ __device__ bool operator()(int x) {
		return (x % 2 == 0);
	}
}
int D1[8] = {0, 0, 0, 0, 0, 0, 0, 0};
is_even pred;
thrust::scatter_if(thrust::host, V, V + 8, M, S, D1, pred);
// thrust::scatter_if(V, V + 8, M, S, D, pred);
// D1: {0, 0, 0, 0, 0, 0, 0, 0} -> {0, 0, 0, 0, 80, 20, 40 ,60}

pred还是针对stencil中的值进行判断,如果符合条件才会对数据进行拷贝。

1.1.3 Copy
1.1.3.1 copy

template<typename DerivedPolicy, typename InputIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::copy(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator first,
	InputIterator last,
	OutputIterator result
);

template<typename InputIterator, typename OutputIterator>
OutputIterator thrust::copy(
	InputIterator first,
	InputIterator last,
	OutputIterator result
);

批量数据拷贝
例子:

thrust::device_vector<int> vec0(100);
thrust::device_vector<int> vec1(100);

thrust::copy(thrust::device, vec0.begin(), vec0.end(), vec1.begin());
// thrust::copy(vec0.begin(), vec0.end(), vec1.begin());
// vec1中的结果和vec0中一致

这么操作完之后,vec1和vec2中的将和vec0中完全一致。

1.1.3.2 copy_n

template<typename DerivedPolicy, typename InputIterator, typename Size, typename OutputIterator>
__host__ __device__ OutputIterator thrust::copy(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	InputIterator first,
	size n,
	OutputIterator result
);

template<typename InputIterator, typename Size, typename OutputIterator>
OutputIterator thrust::copy(
	InputIterator first,
	size n,
	OutputIterator result
);

批量拷贝n个数据
例子:

size_t n = 100;
thrust::device_vector<int> vec0(n);
thrust::device_vector<int> vec1(n);

thrust::copy(thrust::device, vec0.begin(), n, vec1.begin());
// thrust::copy(vec0.begin(), n, vec1.begin());
// vec1中的结果和vec0中一致

1.1.3.3 swap_ranges

template<typename DerivedPolicy, typename ForwardIterator1, typename ForwardIterator2>
__host__ __device__ ForwardIterator2 thrust::swap_ranges(
	const thrust::detail::execution_policy_base<DerivedPolicy> &exec, 
	ForwardIterator1 first1,
	ForwardIterator1 last1,
	ForwardIterator2 first2
);

template<typename ForwardIterator1, typename ForwardIterator2>
ForwardIterator2 thrust::swap_ranges(
	ForwardIterator1 first1,
	ForwardIterator1 last1,
	ForwardIterator2 first2
);

一定范围内数据进行交换
例子:

thrust::device_vector<int> v1(2), v2(2);
v1[0] = 1; v1[1] = 2;
v2[0] = 3; v2[1] = 4;
thrust::swap_ranges(thrust::device, v1.begin(), v1.end(), v2.begin());
// thrust::swap_ranges(v1.begin(), v1.end(), v2.begin());
// v1[0] = 3, v1[1] = 4, v2[0] = 1, v2[1] = 2

1.1.3.4 uninitialized_copy

template<typename DerivedPolicy, typename InputIterator, typename ForwardIterator>
__host__ __device__ ForwardIterator uninitialized_copy(
	const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
  	InputIterator first,
  	InputIterator last,
  	ForwardIterator result
);

template<typename InputIterator, typename ForwardIterator>
ForwardIterator uninitialized_copy(
  	InputIterator first,
  	InputIterator last,
  	ForwardIterator result
);

使用源数据对目标进行初始化
例子:

struct Int {
	__host__ __device__
	Int(int x) : val(x) {}
	int val;
}

const int N = 137;
Int val(46);
thrust::device_vector<Int> input(N, val);
thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
thrust::uninitialized_copy(thrust::device, input.begin(), input.end(), array);
// thrust::uninitialized_copy(input.begin(), input.end(), array);
// Int x = array[i]
// x.val == 46, for all 0 <= i < N

1.1.3.5 uninitialized_copy_n

template<typename DerivedPolicy, typename InputIterator, typename Size, typename ForwardIterator>
__host__ __device__ ForwardIterator uninitialized_copy_n(
	const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
  	InputIterator first,
  	Size n,
  	ForwardIterator result
);

template<typename InputIterator, typename Size, typename ForwardIterator>
ForwardIterator uninitialized_copy_n(
  	InputIterator first,
  	Size n,
  	ForwardIterator result
);

使用源数据对目标中n个数据进行初始化
例子:

struct Int {
	__host__ __device__
	Int(int x) : val(x) {}
	int val;
}

const int N = 137;
Int val(46);
thrust::device_vector<Int> input(N, val);
thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
thrust::uninitialized_copy(thrust::device, input.begin(), N, array);
// thrust::uninitialized_copy(input.begin(), N, array);
// Int x = array[i]
// x.val == 46, for all 0 <= i < N

至此大概简单介绍了一下 copying内部常见的一些api,如果有不对的地方,欢迎指出。

 类似资料: