本人 CUDA小白一枚,要是有什么不对,还望各位大佬指点。
本文及后面的几篇将分别从几个方面来大概阐述一下Thrust的一些接口。原来的网址在这里。
1.5 Partitioning
1.5.1 partition
template <typename DerivedPolicy, typename ForwardIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::partition(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename ForwardIterator, typename Predicate>
ForwardIterator thrust::partition(
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::partition(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
template <typename ForwardIterator, typename InputIterator, typename Predicate>
ForwardIterator thrust::partition(
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
按照一定的pred对[first, last)中的数据进行重排序,可以指定对应的stencil,使得pred判断的时候是针对stencil中的元素进行判断,然后按照判断结果对[first, last)中的数据进行重排序。
例子:
struct is_even {
__host__ __device__
bool operator()(const int &x) {
return (x % 2) == 0;
}
}
int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
const int N = sizeof(A)/sizeof(int);
thrust::partition(thrust::host, A, A + N, is_even());
// thrust::partition(A, A + N, is_even());
// A {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
int B[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
thrust::partition(thrust::host, B, B + N, S, is_even());
// thrust::partition(B, B + N, S, is_even());
// B {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
1.5.2 partition_copy
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator1, typename OutputIterator2, typename Predicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::partition_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename InputIterator, typename OutputIterator1, typename OutputIterator2, typename Predicate>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::partition_copy(
InputIterator first,
InputIterator last,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename Predicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::partition_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename Predicate>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::partition_copy(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
基于partition的基础上增加了一个将结果分别保存在out_true和out_false中,同样的也可以指定stenctil。
例子:
struct is_even {
__host__ __device__
bool operator()(const int &x) {
return (x % 2) == 0;
}
}
int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
int result[10];
const int N = sizeof(A)/sizeof(int);
int *evens = result;
int *odds = result + 5;
thrust::partition_copy(thrust::host, A, A + N, evens, odds, is_even());
// thrust::partition_copy(A, A + N, evens, odds, is_even());
// A {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
// result {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
// evens {2, 4, 6, 8, 10}
// odds {1, 3, 5, 7, 9}
int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
int result2[10];
const int N2 = sizeof(B)/sizeof(int);
int *evens2 = result2;
int *odds2 = result2 + 5;
thrust::partition_copy(thrust::host, B, B + N2, S, evens2, odds2, thrust::identity<int>());
// B {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
// S {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
// result2 {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}
// evens2 {2, 4, 6, 8, 10}
// odds2 {1, 3, 5, 7, 9}
1.5.3 stable_partition
template <typename DerivedPolicy, typename ForwardIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::stable_partition(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename ForwardIterator, typename Predicate>
ForwardIterator thrust::stable_partition(
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::stable_partition(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
template <typename ForwardIterator, typename InputIterator, typename Predicate>
ForwardIterator thrust::stable_partition(
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
stable_partition与partition很像。与partition不同的是,stable可以保证两个重新排序的顺序还是跟之前的顺序保持一致。
例子基本上跟1.5.1的类似,只是换了函数的名字,两个例子的结果也是一致的,所以这边就不再赘述。
1.5.4 stable_partition_copy
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator1, typename OutputIterator2, typename Predicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::stable_partition_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename InputIterator, typename OutputIterator1, typename OutputIterator2, typename Predicate>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::stable_partition_copy(
InputIterator first,
InputIterator last,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename Predicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::stable_partition_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename Predicate>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::stable_partition_copy(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator1 out_true,
OutputIterator2 out_false,
Predicate pred
);
功能与partition_copy类似,不同的地方在于stable可以保证两个重新排序的顺序还是跟之前的顺序保持一致。
例子与1.5.2中的较为类似,只是更换了函数的名字,所以不再赘述。
1.5.5 shuffle
template <typename DerivedPolicy, typename RandomIterator, typename URBG>
__host__ __device__ void thrust::shuffle(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
RandomIterator first,
RandomIterator last,
URBG && g
);
template <typename RandomIterator, typename URBG>
__host__ __device__ void thrust::shuffle(
RandomIterator first,
RandomIterator last,
URBG && g
);
根据一个随机engine,将[first, last)中的元素打乱。
例子:
int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
const int N = sizeof(A)/sizeof(int);
thrust::default_random_engine g;
thrust::shuffle(thrust::host, A, A + N, g);
// A {6, 5, 8, 7, 2, 1, 4, 3, 10, 9}
1.5.6 shuffle_copy
template<typename DerivedPolicy, typename RandomIterator, typename OutputIterator, typename URBG>
__host__ __device__ void shuffle_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
RandomIterator first,
RandomIterator last,
OutputIterator result,
URBG && g
);
template<typename RandomIterator, typename OutputIterator, typename URBG>
__host__ __device__ void shuffle_copy(
RandomIterator first,
RandomIterator last,
OutputIterator result,
URBG && g
);
相比于shuffle而言,多了一个输出结果的位置。
例子:
int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
int result[10];
thrust::default_random_engine g;
thrust::shuffle_copy(thrust::host, A, A + 10, result, g);
// result {6, 5, 8, 7, 2, 1, 4, 3, 10, 9}
1.5.7 copy_if
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred
);
template <typename InputIterator, typename OutputIterator, typename Predicate>
OutputIterator thrust::copy_if(
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
OutputIterator thrust::copy_if(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred
);
按照一定的pred将[firtst, last)中的元素满足条件的拷贝到result中。同样的可以指定对应的stencil,pred将对stencil中的元素进行判断,将符合条件对应的[first, last)中的元素进行拷贝。
例子:
struct is_even {
__host__ __device__
bool operator()(int &x) {
return (x % 2) == 0;
}
}
int V[6] = {-2, 0, -1, 0, 1, 2};
int result[4];
thrust::copy_if(thrust::host, V, V + 6, result, is_even());
// thrust::copy_if(V, V + 6, result, is_even());
// result {-2, 0, 0, 2}
int data[6] = {0, 1, 2, 3, 4, 5};
int stencil[6] = {-2, 0, -1, 0, 1, 2};
int result1[4];
thrust::copy_if(thrust::host, data, data+ 6, stencil, result1, is_even());
// thrust::copy_if(data, data+ 6, stencil, result1, is_even());
// result1 {0, 1, 3, 5}
1.5.8 remove
template <typename DerivedPolicy, typename ForwardIterator, typename T>
__host__ __device__ ForwardIterator thrust::remove(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
const T & value
);
template <typename ForwardIterator, typename T>
ForwardIterator thrust::remove(
ForwardIterator first,
ForwardIterator last,
const T & value
);
删除[first, last)中与value相等的值
例子:
int A[N] = {3, 1, 4, 1, 5, 9};
int *new_end = thrust::remove(thrust::host, A, A + N, 1);
// int *new_end = thrust::remove(A, A + N, 1);
// new_end {3, 4, 5, 9}
1.5.9 remove_copy
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename T>
__host__ __device__ OutputIterator thrust::remove_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
const T & value
);
template <typename InputIterator, typename OutputIterator, typename T>
OutputIterator thrust::remove_copy(
InputIterator first,
InputIterator last,
OutputIterator result,
const T & value
);
与remove不同的是在原来的基础上会多一个结果保存的地址。
例子:
const int N = 6;
int V[N] = {-2, 0, -1, 0, 1, 2};
int result[N - 2];
thrust::remove_copy(thrust::host, V, V + N, result, 0);
// thrust::remove_copy(thrust::host, V, V + N, result, 0);
// result {-2, -1, 1, 2}
1.5.10 remove_if
template <typename DerivedPolicy, typename ForwardIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::remove_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename ForwardIterator, typename Predicate>
ForwardIterator thrust::remove_if(
ForwardIterator first,
ForwardIterator last,
Predicate pred
);
template <typename DerivedPolicy, typename ForwardIterator, typename InputIterator, typename Predicate>
__host__ __device__ ForwardIterator thrust::remove_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
template <typename ForwardIterator, typename InputIterator, typename Predicate>
ForwardIterator thrust::remove_if(
ForwardIterator first,
ForwardIterator last,
InputIterator stencil,
Predicate pred
);
对[first, end)中的元素利用pred进行判断,如果符合条件,则删除,同样的可以指定对应的stencil,对stencil中的元素进行判断爱决定[first, last)中的元素是否需要删除。
struct is_even {
__host__ __device__
bool operator()(int &x) {
return (x % 2) == 0;
}
}
const int N = 6;
int A[N] = {1, 4, 2, 8, 5, 7};
int *new_end = thrust::remove_if(thrust::host, A, A + N, is_even());
// int *new_end = thrust::remove_if(A, A + N, is_even());
// new_end {1, 5, 7}
1.5.10 remove_copy_if
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::remove_copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred
);
template <typename InputIterator, typename OutputIterator, typename Predicate>
OutputIterator thrust::remove_copy_if(
InputIterator first,
InputIterator last,
OutputIterator result,
Predicate pred
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
__host__ __device__ OutputIterator thrust::remove_copy_if(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator, typename Predicate>
OutputIterator thrust::remove_copy_if(
InputIterator1 first,
InputIterator1 last,
InputIterator2 stencil,
OutputIterator result,
Predicate pred
);
与remove_if不同的是多了一个copy的操作,即将删除的元素保存到result中。
struct is_even {
__host__ __device__
bool operator()(int &x) {
return (x % 2) == 0;
}
}
const int N = 6;
int V[N] = {-2, 0, -1, 0, 1, 2};
int result[2];
thrust::remove_copy_if(thrust::host, V, V + N, result, is_even());
// thrust::remove_copy_if(V, V + N, result, is_even());
// result {-1, 1}
1.5.11 unique
template <typename DerivedPolicy, typename ForwardIterator>
__host__ __device__ ForwardIterator thrust::unique(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last
);
template <typename ForwardIterator>
ForwardIterator thrust::unique(
ForwardIterator first,
ForwardIterator last
);
template <typename DerivedPolicy, typename ForwardIterator, typename BinaryPredicate>
__host__ __device__ ForwardIterator thrust::unique(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator first,
ForwardIterator last,
BinaryPredicate binary_pred
);
template <typename ForwardIterator, typename BinaryPredicate>
ForwardIterator thrust::unique(
ForwardIterator first,
ForwardIterator last,
BinaryPredicate binary_pred
);
删除连续的重复的元素,相同的元素,仅保留第一个。可以指定判断的条件。
例子:
int A[7] = {1, 3, 3, 3, 2, 2, 1};
int new_end* = thrust::unique(thrust::host, A, A + 7);
// int new_end* = thrust::unique(A, A + 7);
// A {1, 3, 2, 1}
int B[7] = {1, 3, 2, 3, 2, 1, 1};
int new_end* = thrust::unique(thrust::host, B, B + 7, thrust::euqal_to<int>());
// int new_end* = thrust::unique{B, B + 7, thrust::euqal_to<int>());
// B {1, 3, 2, 3, 2, 1}
1.5.12 unique_copy
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator>
__host__ __device__ OutputIterator thrust::unique_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename InputIterator, typename OutputIterator>
OutputIterator thrust::unique_copy(
InputIterator first,
InputIterator last,
OutputIterator result
);
template <typename DerivedPolicy, typename InputIterator, typename OutputIterator, typename BinaryPredicate>
__host__ __device__ OutputIterator thrust::unique_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator first,
InputIterator last,
OutputIterator result,
BinaryPredicate binary_pred
);
template <typename InputIterator, typename OutputIterator, typename BinaryPredicate>
OutputIterator thrust::unique_copy(
InputIterator first,
InputIterator last,
OutputIterator result,
BinaryPredicate binary_pred
);
与unique不同的是,多了一个copy的操作,删除重复之后的元素回报存在result中,可以指定判断的条件。
例子:
const int N = 7;
int A[N] = {1, 3, 3, 3, 2, 2, 1};
int B[N];
int *result_end = thrust::unique_copy(thrust::host, A, A + N, B);
// int *result_end = thrust::unique_copy(A, A + N, B);
// B {1, 3, 2, 1}
int C[N] = {1, 3, 3, 3, 2, 2, 1};
int D[N];
int *result_end1 = thrust::unique_copy(thrust::host, C, C + N, D, thrust::equal_to<int>());
// int *result_end1 = thrust::unique_copy(C, C + N, D);
// B {1, 3, 2, 1}
1.5.13 unique_by_key
template <typename DerivedPolicy, typename ForwardIterator1, typename ForwardIterator2>
__host__ __device__ thrust::pair< ForwardIterator1, ForwardIterator2 > thrust::unique_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator1 keys_first,
ForwardIterator1 keys_last,
ForwardIterator2 values_first
);
template <typename ForwardIterator1, typename ForwardIterator2>
thrust::pair< ForwardIterator1, ForwardIterator2 > thrust::unique_by_key(
ForwardIterator1 keys_first,
ForwardIterator1 keys_last,
ForwardIterator2 values_first
);
template <typename DerivedPolicy, typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
__host__ __device__ thrust::pair< ForwardIterator1, ForwardIterator2 > thrust::unique_by_key(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
ForwardIterator1 keys_first,
ForwardIterator1 keys_last,
ForwardIterator2 values_first,
BinaryPredicate binary_pred
);
template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate>
thrust::pair< ForwardIterator1, ForwardIterator2 > thrust::unique_by_key(
ForwardIterator1 keys_first,
ForwardIterator1 keys_last,
ForwardIterator2 values_first,
BinaryPredicate binary_pred
);
根据key来指定unique的操作,最终也同步保留values_first中的值。
const int N = 7;
int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values
thrust::pair<int*,int*> new_end;
new_end = thrust::unique_by_key(thrust::host, A, A + N, B);
// new_end = thrust::unique_by_key(A, A + N, B);
// A {1, 3, 2, 1}
// B {9, 8, 5, 3}
int C[N] = {1, 3, 3, 3, 2, 2, 1}; // keys
int D[N] = {9, 8, 7, 6, 5, 4, 3}; // values
thrust::pair<int*,int*> new_end1;
thrust::equal_to<int> binary_pred;
new_end1= thrust::unique_by_key(thrust::host, C, C + N, D, binary_pred);
// new_end1= thrust::unique_by_key(C, C + N, D, binary_pred);
// C {1, 3, 2, 1}
// D {9, 8, 5, 3}
1.5.14 unique_by_key_copy
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::unique_by_key_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_result,
OutputIterator2 values_result
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::unique_by_key_copy(
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_result,
OutputIterator2 values_result
);
template <typename DerivedPolicy, typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate>
__host__ __device__ thrust::pair< OutputIterator1, OutputIterator2 > thrust::unique_by_key_copy(
const thrust::detail::execution_policy_base< DerivedPolicy > & exec,
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_result,
OutputIterator2 values_result,
BinaryPredicate binary_pred
);
template <typename InputIterator1, typename InputIterator2, typename OutputIterator1, typename OutputIterator2, typename BinaryPredicate>
thrust::pair< OutputIterator1, OutputIterator2 > thrust::unique_by_key_copy(
InputIterator1 keys_first,
InputIterator1 keys_last,
InputIterator2 values_first,
OutputIterator1 keys_result,
OutputIterator2 values_result,
BinaryPredicate binary_pred
);
与unique_by_key相比,多一个拷贝的过程,最终的key和value会被拷贝到keys_result和values_result中。
例子:
const int N = 7;
int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
int C[N]; // output keys
int D[N]; // output values
thrust::pair<int*,int*> new_end;
new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D);
// new_end = thrust::unique_by_key_copy(A, A + N, B, C, D);
// C {1, 3, 2, 1}
// D {9, 8, 5,3}
int E[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys
int F[N] = {9, 8, 7, 6, 5, 4, 3}; // input values
int G[N]; // output keys
int H[N]; // output values
thrust::pair<int*,int*> new_end1;
thrust::equal_to<int> binary_pred;
new_end1 = thrust::unique_by_key_copy(thrust::host, E, E + N, F, G, H, binary_pred);
// new_end1 = thrust::unique_by_key_copy(E, E + N, F, G, H, binary_pred);
// G {1, 3, 2, 1}
// H {9, 8, 5,3}