对于一个无序的数组,怎样找到其中第k大的数呢?下面总结几种方法。
使用常见的归并排序、堆排序等算法对数组进行排序,然后找到第k大的数。排序算法的时间复杂度为O(nlogn),所以算法总的时间复杂度为O(nlogn)。
// Simple C++ program to find k'th biggest element
#include<iostream>
#include<algorithm>
using namespace std;
// Function to return k'th biggest element in a given array
int kthBiggest(int arr[], int n, int k)
{
// Sort the given array
sort(arr, arr+n);
// Return k'th element in the sorted array
return arr[n+k-1];
}
// Driver program to test above methods
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
int n = (a, sizeof(a) / sizeof(int));
cout << "K'th biggest element is " << kthBiggest(arr, n, 2);
return 0;
}
可以使用最大堆对数组建立堆,只是需要找到第k大的数所以没有必要像直接排序方法中那样对所有数进行排序。建立好最大队后每次都提取出最大的数,提取 k 次就得到了第 k 大的数。建立最大堆的时间复杂度为O(n),提取最大数后每次调整最大队时间复杂为O(logn),所以总的时间复杂度为O(n+k*logn)。
// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;
// Prototype of a utility function to swap two integers
void swap(int *x, int *y);
// A class for Max Heap
class MaxHeap
{
int *harr; // pointer to array of elements in heap
int capacity; // maximum possible size of max heap
int heap_size; // Current number of elements in max heap
public:
MaxHeap(int a[], int size); // Constructor
void maxHeapify(int i); //To maxHeapify subtree rooted with index i
int parent(int i) { return (i - 1) / 2; }
int left(int i) { return (2 * i + 1); }
int right(int i) { return (2 * i + 2); }
int extractMax(); // extracts root (maximum) element
int getMax() { return harr[0]; } // Returns maximum
// to replace root with new node x and heapify() new root
void replaceMax(int x) { harr[0] = x; maxHeapify(0); }
};
MaxHeap::MaxHeap(int a[], int size)
{
heap_size = size;
harr = a; // store address of array
int i = (heap_size - 1) / 2;
while (i >= 0)
{
maxHeapify(i);
i--;
}
}
// Method to remove maximum element (or root) from max heap
int MaxHeap::extractMax()
{
if (heap_size == 0)
return INT_MAX;
// Store the maximum vakue.
int root = harr[0];
// If there are more than 1 items, move the last item to root
// and call heapify.
if (heap_size > 1)
{
harr[0] = harr[heap_size - 1];
maxHeapify(0);
}
heap_size--;
return root;
}
// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MaxHeap::maxHeapify(int i)
{
int l = left(i);
int r = right(i);
int largest = i;
if (l < heap_size && harr[l] > harr[i])
largest = l;
if (r < heap_size && harr[r] > harr[largest])
largest = r;
if (largest != i)
{
swap(&harr[i], &harr[largest]);
maxHeapify(largest);
}
}
// A utility function to swap two elements
void swap(int *x, int *y)
{
int temp = *x;
*x = *y;
*y = temp;
}
// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
// Build a heap of all n numbers
MaxHeap mh(arr, n);
int num = 0;
// every time extract biggest number , k times
for (int i = 0; i < k; i++)
num = mh.extractMax();
return num;
}
// Driver program to test above methods
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
int n = (a, sizeof(a) / sizeof(int));
cout << "K'th biggest element is " << kthBiggest(a, n, 3);
return 0;
}
由于是要找 k 个最大的数,所以没有必要对所有数进行完整的排序。每次只保留 k 个当前最大的数就可以,然后每次对新来的元素跟当前 k 个树中最小的数比较,新元素大的话则插入到数组中,否则跳过。循环结束后数组中最小的数即是我们要找到第 k 大的数。
时间复杂度 (n-k)logk
// A C++ program to find k'th smallest element using min heap
#include<iostream>
using namespace std;
void print_array(int a[], int low, int high)
{
for (int i = low; i <= high; i++){
cout << a[i] << " ";
}
cout << endl;
}
void insert(int a[], int low, int high, int num)
{
int i = high-1;
while (i >= low && a[i] < num){
a[i + 1] = a[i];
i--;
}
a[i + 1] = num;
}
void insertSort(int a[], int low, int high)
{
for (int i = low + 1; i <= high; i++)
{
int temp = a[i];
int j = i - 1;
while (j >= low && a[j] < temp)
{
a[j + 1] = a[j];
j--;
}
a[j + 1] = temp; //do not swap every time
}
}
// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
int *a = new int[k];
for (int i = 0; i < k; i++){
a[i] = arr[i];
}
insertSort(a, 0, k - 1);
for (int i = k; i < n ; i++){
if (arr[i]>a[k - 1]) insert(a, 0, k-1, arr[i]);
}
int num = a[k - 1];
delete[] a;
return num;
}
// Driver program to test above methods
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
int n = (a, sizeof(a) / sizeof(int));
print_array(a, 0, n-1);
cout << "K'th biggest element is " << kthBiggest(a, n, 3);
return 0;
}
上一方法中的临时数组可以替换为最小堆,来找到数组中k个最大的数。先对前 k 个数进行建堆,堆顶为当前k个数组中最小的元素。时间复杂度O(k)。
然后将第 k+1…n 个数依次和堆顶元素比较,如果新元素大于堆顶元素,则将它作为新的堆顶元素并调整最小堆,否则再处理下一个数。这样每一次循环都将保证最小堆中的k个数为当前最大的 k 个数。 时间复杂度O((n-k)*logk)。
循环结束后,堆顶元素即是我们要找的第 k 大的数。
算法总的时间复杂度是O(k+(n-k)*logk)。
// A C++ program to find k'th smallest element using min heap
#include<iostream>
#include<climits>
using namespace std;
// Prototype of a utility function to swap two integers
void swap(int *x, int *y);
// A class for min Heap
class MinHeap
{
int *harr; // pointer to array of elements in heap
int capacity; // maximum possible size of max heap
int heap_size; // Current number of elements in max heap
public:
MinHeap(int a[], int size); // Constructor
void MinHeapify(int i); //To minHeapify subtree rooted with index i
int parent(int i) { return (i - 1) / 2; }
int left(int i) { return (2 * i + 1); }
int right(int i) { return (2 * i + 2); }
int extractMin(); // extracts root (minimum) element
int getMin() { return harr[0]; } // Returns minimum
// to replace root with new node x and heapify() new root
void replaceMax(int x) { harr[0] = x; MinHeapify(0); }
};
MinHeap::MinHeap(int a[], int size)
{
heap_size = size;
harr = a; // store address of array
int i = (heap_size - 1) / 2;
while (i >= 0)
{
MinHeapify(i);
i--;
}
}
// Method to remove minimum element (or root) from min heap
int MinHeap::extractMin()
{
if (heap_size == 0)
return INT_MAX;
// Store the minimum vakue.
int root = harr[0];
// If there are more than 1 items, move the last item to root
// and call heapify.
if (heap_size > 1)
{
harr[0] = harr[heap_size - 1];
MinHeapify(0);
}
heap_size--;
return root;
}
// A recursive method to heapify a subtree with root at given index
// This method assumes that the subtrees are already heapified
void MinHeap::MinHeapify(int i)
{
int l = left(i);
int r = right(i);
int smallest = i;
if (l < heap_size && harr[l] < harr[i])
smallest = l;
if (r < heap_size && harr[r] < harr[smallest])
smallest = r;
if (smallest != i)
{
swap(&harr[i], &harr[smallest]);
MinHeapify(smallest);
}
}
// A utility function to swap two elements
void swap(int *x, int *y)
{
int temp = *x;
*x = *y;
*y = temp;
}
// Function to return k'th largest element in a given array
int kthBiggest(int arr[], int n, int k)
{
// Build a heap of first k elements: O(k) time
MinHeap mh(arr, k);
// Process remaining n-k elements. If current element is
// bigger than root, replace root with current element
for (int i = k; i < n; i++)
if (arr[i] > mh.getMin())
mh.replaceMax(arr[i]);
// Return root
return mh.getMin();
}
// Driver program to test above methods
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
int n = (a, sizeof(a) / sizeof(int));
cout << "K'th biggest element is " << kthBiggest(a, n, 3);
return 0;
}
回忆一下快排算法,每次都根据主元将数组划分为两部分,一部分数全部比主元要小,另一部分全部比主元要大,划分完之后可以得到主元的位置,也就是可以知道主元是数组中第多大的数。那么可以根据此信息,来找到第 k 大的数,每次都只对划分后的一部分数进行处理。如果主元刚好是第 k 大的数则直接返回主元就可以了。否则更具主元的位置来缩小查找空间,迭代查找。
为了得到期望的时间复杂度为线性时间复杂度,快速排序中的主元的选择使用随机数。
该算法的期望时间复杂度为O(n)
// C++ implementation of randomized quickSelect
#include<iostream>
#include<climits>
#include<cstdlib>
using namespace std;
int randomPartition(int arr[], int l, int r);
// This function returns k'th biggest element in arr[l..r] using
// QuickSort based method. ASSUMPTION: ELEMENTS IN ARR[] ARE DISTINCT
int kthBiggest(int arr[], int l, int r, int k)
{
// If k is smaller than number of elements in array
if (k > 0 && k <= r - l + 1)
{
// Partition the array around a random element and
// get position of pivot element in sorted array
int pos = randomPartition(arr, l, r);
// If position is same as k
if (pos - l == k - 1)
return arr[pos];
if (pos - l > k - 1) // If position is more, recur for left subarray
return kthBiggest(arr, l, pos - 1, k);
// Else recur for right subarray
return kthBiggest(arr, pos + 1, r, k - pos + l - 1);
}
// If k is more than number of elements in array
return INT_MIN;
}
void swap(int *a, int *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
// Standard partition process of QuickSort(). It considers the last
// element as pivot and moves all smaller element to right of it and
// greater elements to left. This function is used by randomPartition()
int partition(int arr[], int l, int r)
{
int x = arr[r], i = l;
for (int j = l; j <= r - 1; j++)
{
if (arr[j] > x)
{
swap(&arr[i], &arr[j]);
i++;
}
}
swap(&arr[i], &arr[r]);
return i;
}
// Picks a random pivot element between l and r and partitions
// arr[l..r] arount the randomly picked element using partition()
int randomPartition(int arr[], int l, int r)
{
int n = r - l + 1;
int pivot = rand() % n;
swap(&arr[l + pivot], &arr[r]);
return partition(arr, l, r);
}
// Driver program to test above methods
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20 };
int n = (a, sizeof(a) / sizeof(int));
cout << "K'th biggest element is " << kthBiggest(a, 0, n - 1, 2);
return 0;
}
算法导论中介绍了一种方法,能保证最坏情况下该算法依旧是线性时间复杂度。
算法步骤:
1.将数组的 n 个数划分为 n/5 组,每组 5 个元素。
2.寻找这 n/5 组中每一组的中位数:首先对每组元素进行插入排序,然后确定每组有序元素的中位数。
3.对第 2 步中找出的 n/5 个中位数,递归调用 BFPRT 查找算法本身来找到这 n/5 个数的中位数
x
。
4.利用修改过的划分函数,按中位数的中位数
5.如果
可以用一个递归式来得到算法的时间复杂度。令T(n)为时间复杂度,则1、2、4都需要O(n)时间,步骤三需要T(n/5),步骤5最多需要 T(7n/10+6),所以递归式:
T(n) <= T(n/5)+T(7n/10)+O(n)
可以求得T(n) = O(n)
//implement max heap sort
#include "iostream"
#include "math.h"
void print_array(int a[], int beg, int end)
{
for (int i = beg; i <= end; i++)
std::cout << a[i] << " ";
std::cout << std::endl;
}
// partition and rank from big to small
// ran partition use the a[pivot] as pivot
// and return the new index of the a[pivot]
int partition(int a[], int low, int high, int pivot)
{
std::swap(a[high], a[pivot]);
//the index of the first place for replaced
int index_first_nosmall = low;
for (int i = low; i < high; i++)
{
if (a[i] > a[high])
{
std::swap(a[i], a[index_first_nosmall++]);
}
}
std::swap(a[high], a[index_first_nosmall]);
return index_first_nosmall;
}
//insert sort, from big to small
void InsertSort(int a[], int low, int high)
{
for (int i = low+1; i <= high; i++)
{
int temp = a[i];
int j = i - 1;
while (j >= low && a[j] < temp)
{
a[j + 1] = a[j];
j--;
}
a[j+1] = temp; //do not swap every time
}
}
//insert sort
void InsertSort_Lite(int a[], int low, int high)
{
for (int i = low + 1; i <= high; i++)
{
int j = i - 1;
while (j >= low && a[j] < a[j+1])
{
std::swap(a[j], a[j+1]);
j--;
}
}
}
int Find_k_Biggest(int a[], int low, int high, int k)
{
std::cout << "print_array(a, low, high):" << low << " , " << high <<" k: "<<k<< std::endl;
print_array(a, low, high);
if (high + 1 - low <= 5)
{
InsertSort(a, low, high);
return a[low + k - 1];
}
int j = low;
for (int i = 0; i <= high; i+=5)
{
int end = (i + 4 <= high ? (i + 4) : high);
InsertSort(a, i, end);
print_array(a, i, end);
std::swap(a[j++], a[(i+end)>>1]);
if (end != i + 4) break;
}
print_array(a, low, high);
//find the median of the medians
int pivot = (low + j -1) >> 1;
Find_k_Biggest(a, low, j-1, pivot+1-low);
//use pivot to partition the array
int resulted_pivot = partition(a, low, high, pivot);
int current_rank = resulted_pivot + 1 - low;
if (current_rank == k) return a[resulted_pivot];
else if (resulted_pivot < k) return Find_k_Biggest(a, resulted_pivot+1, high, k-resulted_pivot);
else return Find_k_Biggest(a, low, resulted_pivot - 1, k);
}
int main()
{
int a[] = { 8, 5, -4, -6, 9, 3, 7, -1, 2, 0, 15, -11, -13, 14, 12, 20};
int n = (a, sizeof(a) / sizeof(int));
print_array(a, 0, n-1);
std::cout << Find_k_Biggest(a, 0, n - 1, 2) << std::endl;;
print_array(a, 0, n-1);
return 0;
}
http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array/
http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-2-expected-linear-time/
http://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time/