本人 CUDA小白一枚,要是有什么不对,还望各位大佬指点。
本文及后面的几篇将分别从几个方面来大概阐述一下Thrust的一些接口。原来的网址在这里。
1.9 Transformations
1.9.1 fill
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ void thrust::fill(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
const T & value
);
template <typename ForwardIterator, typename T>
__host__ __device__ void thrust::fill(
ForwardIterator first,
ForwardIterator last,
const T & value
);
将[first, last)填充value。
例子:
thrust::device_vector<int> v(4);
thrust::fill(thrust::device, v.begin(), v.end(), 137);
thrust::fill(v.begin(), v.end(), 137);
// v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
1.9.2 fill_n
template <typename DerivedPolicy, typename OutputIterator, typename Size, typename T>
__host__ __device__ OutputIterator thrust::fill_n(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
OutputIterator first,
Size n,
const T & value
);
template <typename OutputIterator, typename Size, typename T>
__host__ __device__ OutputIterator thrust::fill_n(
OutputIterator first,
Size n,
const T & value
);
将[first, first + n) 填充value。
例子:
thrust::device_vector<int> v(4);
thrust::fill_n(thrust::device, v.begin(), v.size(), 137);
// thrust::fill_n(v.begin(), v.size(), 137);
// v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137
1.9.3 uninitialized_fill
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ void thrust::uninitialized_fill(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
const T & x
);
template <typename ForwardIterator, typename T>
void thrust::uninitialized_fill(
ForwardIterator first,
ForwardIterator last,
const T & x
);
如果[first, last)只想的是一段没有初始化的空间,那么该函数将会复制x。
例子:
struct Int {
__host__ __device__
Int(int x) : val(x) {}
int val;
};
Int val(46);
thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
thrust::uninitialized_fill(thrust::device, array, array + N, val);
// thrust::uninitialized_fill(array, array + N, val);
// int x = array[i] x.val == 46, i 属于[0, N)
1.9.4 uninitialized_fill_n
template <typename DerivedPolicy, typename ForwardIterator, typename Size, typename T>
__host__ __device__ ForwardIterator thrust::uninitialized_fill_n(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
Size n,
const T & x
);
template <typename ForwardIterator, typename Size, typename T>
ForwardIterator thrust::uninitialized_fill_n(
ForwardIterator first,
Size n,
const T & x
);
原理同uninitialized_fill,只将[first, first + n)进行填充。
例子:
struct Int {
__host__ __device__
Int(int x) : val(x) {}
int val;
};
...
const int N = 137;
Int val(46);
thrust::device_ptr<Int> array = thrust::device_malloc<Int>(N);
thrust::uninitialized_fill_n(thrust::device, array, N, val);
// thrust::uninitialized_fill_n(array, N, val);
// int x = array[i] x.val == 46, i 属于[0, N)
1.9.5 for_each
template <typename DerivedPolicy, typename InputIterator, typename UnaryFunction>
__host__ __device__ InputIterator thrust::for_each(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
UnaryFunction f
);
template <typename InputIterator, typename UnaryFunction>
InputIterator thrust::for_each(
InputIterator first,
InputIterator last,
UnaryFunction f
);
针对[first, last)之间的元素分别指定f。
例子:
struct printf_functor {
__host__ __device__
void operator()(int x) {
// note that using printf in a __device__ function requires
// code compiled for a GPU with compute capability 2.0 or
// higher (nvcc --arch=sm_20)
printf("%d\n", x);
}
};
thrust::device_vector<int> d_vec(3);
d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
thrust::for_each(thrust::device, d_vec.begin(), d_vec.end(), printf_functor());
// thrust::for_each(d_vec.begin(), d_vec.end(), printf_functor());
// 最终输出:
// 0
// 1
// 2
1.9.6 for_each_n
template <typename DerivedPolicy, typename InputIterator, typename Size, typename UnaryFunction>
__host__ __device__ InputIterator thrust::for_each_n(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
Size n,
UnaryFunction f
);
template <typename InputIterator, typename Size, typename UnaryFunction>
InputIterator thrust::for_each_n(
InputIterator first,
Size n,
UnaryFunction f
);
针对[first, first + n)中的元素分别执行f。
例子:
struct printf_functor {
__host__ __device__
void operator()(int x) {
// note that using printf in a __device__ function requires
// code compiled for a GPU with compute capability 2.0 or
// higher (nvcc --arch=sm_20)
printf("%d\n", x);
}
};
thrust::device_vector<int> d_vec(3);
d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2;
thrust::for_each_n(thrust::device, d_vec.begin(), d_vec.size(), printf_functor());
// thrust::for_each_n(d_vec.begin(), d_vec.size(), printf_functor());
// 最终输出:
// 0
// 1
// 2
1.9.7 replace
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ void thrust::replace(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
const T & old_value,
const T & new_value);
template <typename ForwardIterator, typename T>
void thrust::replace(
ForwardIterator first,
ForwardIterator last,
const T & old_value,
const T & new_value);
将[first, last)中的old_value,替换成new_value。
例子:
thrust::device_vector<int> A(4);
A[0] = 1;
A[1] = 2;
A[2] = 3;
A[3] = 1;
thrust::replace(thrust::device, A.begin(), A.end(), 1, 99);
// thrust::replace(A.begin(), A.end(), 1, 99);
// 99, 2, 3, 99
1.9.8 replace_if
template <typename DerivedPolicy, typename ForwardIterator, typename Predicate, typename T>
__host__ __device__ void thrust::replace_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Predicate pred,
const T & new_value
);
template <typename ForwardIterator, typename Predicate, typename T>
void thrust::replace_if(
ForwardIterator first,
ForwardIterator last,
Predicate pred,
const T & new_value
);
template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
__host__ __device__ void thrust::replace_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred,
const T & new_value
);
template <typename ForwardIterator, typename InputIterator, typename Predicate, typename T>
void thrust::replace_if(
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred,
const T & new_value
);
相较于replace直接替换值,replace_if则是直接通过判断条件pred决定当前值是不是需要更新。
例子:
struct is_less_than_zero {
__host__ __device__
bool operator()(int x) {
return x < 0;
}
};
thrust::device_vector<int> A(4);
A[0] = 1;
A[1] = -3;
A[2] = 2;
A[3] = -1;
is_less_than_zero pred;
thrust::replace_if(thrust::device, A.begin(), A.end(), pred, 0);
// thrust::replace_if(A.begin(), A.end(), pred, 0);
// A {1, 0, 2, 0}
1.9.9 replace_copy
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
__host__ __device__ OutputIterator thrust::replace_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
const T & old_value,
const T & new_value
);
template <typename InputIterator, typename OutputIterator, typename T>
OutputIterator thrust::replace_copy(
InputIterator first,
InputIterator last,
OutputIterator result,
const T & old_value,
const T & new_value
);
将[first, last)中的结果复制到result中,并且如果出现old_value,则替换成new_value
例子:
thrust::device_vector<int> A(4);
A[0] = 1;
A[1] = 2;
A[2] = 3;
A[3] = 1;
thrust::device_vector<int> B(4);
thrust::replace_copy(thrust::device, A.begin(), A.end(), B.begin(), 1, 99);
// thrust::replace_copy(A.begin(), A.end(), B.begin(), 1, 99);
// B {99, 2, 3, 99}
1.9.10 replace_copy_if
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate, typename T>
__host__ __device__ OutputIterator thrust::replace_copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred,
const T & new_value
);
template <typename InputIterator, typename OutputIterator, typename Predicate, typename T>
OutputIterator thrust::replace_copy_if(
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred,
const T & new_value
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
__host__ __device__ OutputIterator thrust::replace_copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred,
const T & new_value
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate, typename T>
OutputIterator thrust::replace_copy_if(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred,
const T & new_value
);
将[first, last)中的元素拷贝到result中,如果符合pred条件,则拷贝的值替换成new_value。也可以指定stencil,是的pred判断的时候是针对stencil进行判断。
struct is_less_than_zero {
__host__ __device__
bool operator()(int x) {
return x < 0;
}
};
thrust::device_vector<int> A(4);
A[0] = 1;
A[1] = -3;
A[2] = 2;
A[3] = -1;
thrust::device_vector<int> B(4);
is_less_than_zero pred;
thrust::replace_copy_if(thrust::device, A.begin(), A.end(), B.begin(), pred, 0);
// thrust::replace_copy_if(A.begin(), A.end(), B.begin(), pred, 0);
// B {1, 0, 2, 0}
thrust::device_vector<int> C(4);
C[0] = 10;
C[1] = 20;
C[2] = 30;
C[3] = 40;
thrust::device_vector<int> S(4);
S[0] = -1;
S[1] = 0;
S[2] = -1;
S[3] = 0;
thrust::device_vector<int> D(4);
is_less_than_zero pred;
thrust::replace_if(thrust::device, C.begin(), C.end(), D.begin(), B.begin(), pred, 0);
// thrust::replace_if(thrust::device, C.begin(), C.end(), D.begin(), B.begin(), pred, 0);
// B {0, 20, 0, 40}
1.9.11 adjacent_difference
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::adjacent_difference(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename BinaryFunction>
__host__ __device__ OutputIterator thrust::adjacent_difference(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
BinaryFunction binary_op
);
template <typename InputIterator, typename OutputIterator>
OutputIterator thrust::adjacent_difference(
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename InputIterator, typename OutputIterator, typename BinaryFunction>
OutputIterator thrust::adjacent_difference(
InputIterator first,
InputIterator last,
OutputIterator result,
BinaryFunction binary_op
);
针对[first, last)中每个元素依次判断当前元素与前一个元素的差,依次存放到result中对应的位置。默认的操作求差,可以指定对应的binary_op。
例子:
int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2};
thrust::device_vector<int> d_data(h_data, h_data + 8);
thrust::device_vector<int> d_result(8);
thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin());
// thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin());
// d_result {1, 1, -1, 1, -1, 1, -1, 1}
thrust::device_vector<int> d_result1(8);
thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin(), thrust::plus<int>());
// thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin(), thrust::plus<int>());
// d_result {1, 1, -1, 1, -1, 1, -1, 1}
1.9.12 generate
template <typename DerivedPolicy, typename ForwardIterator, typename Generator>
__host__ __device__ void thrust::generate(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Generator gen
);
template <typename ForwardIterator, typename Generator>
void thrust::generate(
ForwardIterator first,
ForwardIterator last,
Generator gen
);
利用gen生成元素填充[first, last)
例子:
thrust::host_vector<int> v(10);
srand(13);
thrust::generate(thrust::host, v.begin(), v.end(), rand);
// thrust::generate(v.begin(), v.end(), rand);
// 随机数
1.9.13 generate_n
template <typename DerivedPolicy, typename OutputIterator, typename Size, typename Generator>
__host__ __device__ OutputIterator generate_n(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
OutputIterator first,
Size n,
Generator gen
);
template <typename OutputIterator, typename Size, typename Generator>
OutputIterator generate_n(
OutputIterator first,
Size n,
Generator gen
);
利用gen生成元素填充[first, first + n)
例子:
thrust::host_vector<int> v(10);
srand(13);
thrust::generate_n(thrust::host, v.begin(), 10, rand);
// thrust::generate_n(v.begin(), 10, rand);
1.9.14 sequence
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ void thrust::sequence(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
void thrust::sequence(
ForwardIterator first,
ForwardIterator last
);
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ void thrust::sequence(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
T init
);
template <typename ForwardIterator, typename T>
void thrust::sequence(
ForwardIterator first,
ForwardIterator last,
T init
);
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ void thrust::sequence(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
T init,
T step
);
template <typename ForwardIterator, typename T>
void thrust::sequence(
ForwardIterator first,
ForwardIterator last,
T init,
T step);
将[first, last)填充序列,默认填充当前index - first。可以指定起始值init,也可以指定步长step。
例子:
const int N = 10;
int A[N];
thrust::sequence(thrust::host, A, A + 10);
// thrust::sequence(A, A + 10);
// A {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int B[N];
thrust::sequence(thrust::host, B, B + 10, 1);
// thrust::sequence(B, B + 10, 1);
// B {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
int C[N];
thrust::sequence(thrust::host, C, C + 10, 1, 3);
// thrust::sequence(C, C + 10, 1, 3);
// C {1, 4, 7, 10, 13, 16, 19, 22, 25, 28}
1.9.15 tabulate
template <typename DerivedPolicy, typename ForwardIterator, typename UnaryOperation>
__host__ __device__ void thrust::tabulate(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
UnaryOperation unary_op
);
template <typename ForwardIterator, typename UnaryOperation>
void thrust::tabulate(
ForwardIterator first,
ForwardIterator last,
UnaryOperation unary_op
);
针对[first, last)中的元素,分别填充unary_op(当前index - first)的值。
例子:
const int N = 10;
int A[N];
thrust::tabulate(thrust::host, A, A + 10, thrust::negate<int>());
// thrust::tabulate(A, A + 10, thrust::negate<int>());
// A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9}
1.9.16 transform
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename UnaryFunction>
__host__ __device__ OutputIterator thrust::transform(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction op
);
template <typename InputIterator, typename OutputIterator, typename UnaryFunction>
OutputIterator thrust::transform(
InputIterator first,
InputIterator last,
OutputIterator result,
UnaryFunction op
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryFunction>
__host__ __device__ OutputIterator thrust::transform(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryFunction op
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename BinaryFunction>
OutputIterator thrust::transform(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator result,
BinaryFunction op
);
将[first, last)中的元素按照一定的op将结果存放到result。如果指定两个元素组,则可以实现两个元素组之间的op。
例子:
int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
thrust::negate<int> op;
thrust::transform(thrust::host, data, data + 10, data, op);
// thrust::transform(data, data + 10, data, op);
// data {5, 0, -2, 3, -2, -4, 0, 1, -2, -8};
int input1[6] = {-5, 0, 2, 3, 2, 4};
int input2[6] = { 3, 6, -2, 1, 2, 3};
int output[6];
thrust::plus<int> op2;
thrust::transform(thrust::host, input1, input1 + 6, input2, output, op2);
// thrust::transform(input1, input1 + 6, input2, output, op2);
// output {-2, 6, 0, 4, 4, 7}
1.9.17 transform_if
template <typename DerivedPolicy, typename InputIterator, typename ForwardIterator, typename UnaryFunction, typename Predicate>
__host__ __device__ ForwardIterator thrust::transform_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
ForwardIterator result,
UnaryFunction op,
Predicate pred
);
template <typename InputIterator, typename ForwardIterator, typename UnaryFunction, typename Predicate>
ForwardIterator thrust::transform_if(
InputIterator first,
InputIterator last,
ForwardIterator result,
UnaryFunction op,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename ForwardIterator, typename UnaryFunction, typename Predicate>
__host__ __device__ ForwardIterator thrust::transform_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
ForwardIterator result,
UnaryFunction op,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename ForwardIterator, typename UnaryFunction, typename Predicate>
ForwardIterator thrust::transform_if(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
ForwardIterator result,
UnaryFunction op,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename InputIterator3, typename ForwardIterator, typename BinaryFunction, typename Predicate>
__host__ __device__ ForwardIterator thrust::transform_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
InputIterator3 stencil,
ForwardIterator result,
BinaryFunction binary_op,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename InputIterator3, typename ForwardIterator, typename BinaryFunction, typename Predicate>
ForwardIterator thrust::transform_if(
InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
InputIterator3 stencil,
ForwardIterator result,
BinaryFunction binary_op,
Predicate pred
);
可以将[first1, last1)之间的元素判断pred,如果符合条件,则进行对应的binary_op的操作。同样的可以指定stencil,利用pred针对stencil进行判断,如果符合条件,则进行对应的binary_op的操作。同样的也可以对两个元素组进行transform。
例子:
struct is_odd {
__host__ __device__
bool operator()(int x) {
return x % 2;
}
};
int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
thrust::negate<int> op;
thrust::identity<int> identity;
thrust::transform_if(thrust::host, data, data + 10, data, op, is_odd());
// thrust::transform_if(data, data + 10, data, op, is_odd());
// data {5, 0, 2, 3, 2, 4, 0, 1, 2, 8}
int data1[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8};
int stencil[10] = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
thrust::transform_if(thrust::host, data, data + 10, stencil, data, op, identity);
// data {5, 0, -2, -3, -2, 4, 0, -1, -2, 8};
int input1[6] = {-5, 0, 2, 3, 2, 4};
int input2[6] = { 3, 6, -2, 1, 2, 3};
int stencil[8] = { 1, 0, 1, 0, 1, 0};
int output[6];
thrust::plus<int> op1;
thrust::transform_if(thrust::host, input1, input1 + 6, input2, stencil, output, op1, identity);
// thrust::transform_if(input1, input1 + 6, input2, stencil, output, op1, identity);
// output {-2, 0, 0, 3, 4, 4};