## 从无重复大数组找TOP N元素的最优解说起 原

蓝猫163

1 题目

There is an array of 10000000 different int numbers. Find out its largest 100 elements. The implementation should be optimized for executing speed.

2 分析与解
（接下来的算法均以Java语言实现。）

2.1 堆排序思路

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**

• Implementation of finding top 100 elements out of a huge int array.
• There is an array of 10000000 different int numbers. Find out its largest 100
• elements. The implementation should be optimized for executing speed.
• Note: This is the third version of implementation, this time I make the best out
• of the heap sort algorithm by using a minimum heap. The heap maintains the top biggest
• numbers that guarantees the minimum number is removed every time a new number is added
• to the heap. It saves memory usage to the limit by just using an array which size is 101
• and a few temp elements. However, the performance is not as good as the bit map way but
• better than the multiple thread way.
• @author zhangxu04
*/
public class FindTopElements3 {

private static final int ARRAY_LENGTH = 10000000; // big array length

public static void main(String[] args) {
FindTopElements3 fte = new FindTopElements3();

``````// Get a array which is not in order and elements are not duplicate
int[] array = getShuffledArray(ARRAY_LENGTH);

// Find top 100 elements and print them by desc order in the console
long start = System.currentTimeMillis();
fte.findTop100(array);
long end = System.currentTimeMillis();
System.out.println("Costs " + (end - start) + "ms");``````

}

public void findTop100(int[] arr) {
MinimumHeap heap = new MinimumHeap(100);
for (Integer i : arr) {
if (heap.size() > 100) {
heap.deleteTop();
}
}
for (int i = 0; i < 100; i++) {
System.out.println(heap.deleteTop());
}
}

/**
• Get shuffled int array
• @return array not in order and elements are not duplicate
*/
private static int[] getShuffledArray(int len) {
System.out
.println("Start to generate test array... this may take several seconds.");
List list = new ArrayList (len);
for (int i = 0; i < len; i++) {
}
Collections.shuffle(list);

int[] ret = new int[len];
for (int i = 0; i < len; i++) {
ret[i] = list.get(i);
}
return ret;
}

}

class MinimumHeap {

``````int[] items;
int size;

public MinimumHeap(int size) {
items = new int[size + 1];
size = 0;
}

void shiftUp(int index) {
int intent = items[index];
while (index > 0) {
int pindex = (index - 1) / 2;
int parent = items[pindex];
if (intent < parent) {
items[index] = parent;
index = pindex;
} else {
break;
}
}
items[index] = intent;
}

void shiftDown(int index) {
int intent = items[index];
int leftIndex = 2 * index + 1;
while (leftIndex < size) {
int minChild = items[leftIndex];
int minIndex = leftIndex;

int rightIndex = leftIndex + 1;
if (rightIndex < size) {
int rightChild = items[rightIndex];
if (rightChild < minChild) {
minChild = rightChild;
minIndex = rightIndex;
}
}

if (minChild < intent) {
items[index] = minChild;
index = minIndex;
leftIndex = index * 2 + 1;
} else {
break;
}
}
items[index] = intent;
}

items[size++] = item;
shiftUp(size - 1);
}

public int deleteTop() {
if (size < 1) {
return 0;
}
int maxItem = items[0];
int lastItem = items[size - 1];
size--;
if (size < 1) {
return lastItem;
}
items[0] = lastItem;
shiftDown(0);
return maxItem;
}

public boolean isEmpty() {
return size < 1;
}

public int size() {
return size;
}

/**
* MinimumHeap main test
* @param args
*/
public static void main(String[] args) {
MinimumHeap heap = new MinimumHeap(7);

heap.deleteTop();
heap.deleteTop();

while (!heap.isEmpty()) {
System.out.println(heap.deleteTop());
}
}``````

}

1、元素互不重复

2、最快的速度，没有提及对于系统资源以及空间的要求

2.2 多线程分而治之策略

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**

• Implementation of finding top 100 elements out of a huge int array.
• There is an array of 10000000 different int numbers.
• Find out its largest 100 elements.
• The implementation should be optimized for executing speed.
• @author zhangxu04
*/
public class FindTopElements {

private static final int ARRAY_LENGTH = 10000000; // big array length
private static final int ELEMENT_NUM_PER_GROUP = 10000; // split big array into sub-array, this represents sub-array length
private static final int TOP_ELEMENTS_NUM = 100; // top elements number

private ExecutorService executorService;

private CompletionService completionService;

public FindTopElements() {
completionService = new ExecutorCompletionService (executorService);
}

/**
• Start from here :-)
• @param args
*/
public static void main(String[] args) {
FindTopElements findTopElements = new FindTopElements();

// Get a array which is not in order and elements are not duplicate
int[] array = getShuffledArray(ARRAY_LENGTH);

// Find top 100 elements and print them by desc order in the console
long start = System.currentTimeMillis();
findTopElements.findTop100(array);
long end = System.currentTimeMillis();
System.out.println("Costs " + (end - start) + "ms");
}

/**
• Leveraging concurrent components of JDK, we can deal small parts of the huge array concurrently.
• The huge array are split into several sub arrays which are submitted to a thread pool one by one.
• By using `CompletionService`, we can take out completed result from the pool as soon as possible,
• which avoid the block issue when getting future result through a future task list by using
• `ExcutorService` and `Future` class. Moreover, the can optimize the performance of
• the piece of code by processing the completed results once we get them, so the overall sort invocation will
• not be delayed to the final moment.
• /
private void findTop100(int[] arr) {
System.out.println("Start to compute.");
int groupNum = (ARRAY_LENGTH / ELEMENT_NUM_PER_GROUP);
System.out.println("Split " + ARRAY_LENGTH + " elements into " + groupNum + " groups");
for (int i = 0; i < groupNum; i++) {
int[] toBeSortArray = new int[ELEMENT_NUM_PER_GROUP];
System.arraycopy(arr, i
ELEMENT_NUM_PER_GROUP, toBeSortArray, 0, ELEMENT_NUM_PER_GROUP);
completionService.submit(new FindTop100(toBeSortArray));
}

try {
int[] overallArray = new int[TOP_ELEMENTS_NUM * groupNum];
for (int i = 0; i < groupNum; i++) {
System.arraycopy(completionService.take().get(), 0, overallArray, i * TOP_ELEMENTS_NUM, TOP_ELEMENTS_NUM);
}
Arrays.sort(overallArray);
for (int i = 1; i <= TOP_ELEMENTS_NUM; i++) {
System.out.println(overallArray[TOP_ELEMENTS_NUM * groupNum - i]);
}
System.out.println("Finish to output result.");
} catch (Exception e) {
e.printStackTrace();
}
executorService.shutdown();
}

/**
• Callable of finding top 100 elements
• The steps are as below:
• 1) Quick sort a array
• 2) Get reverse 100 elements and put them into a new array
• 3) return the new array
*/
private class FindTop100 implements Callable {

private int[] array;

public FindTop100(int[] array) {
this.array = array;
}

@Override
public int[] call() throws Exception {
int len = array.length;
Arrays.sort(array);
int[] result = new int[TOP_ELEMENTS_NUM];
int index = 0;
for (int i = 1; i <= TOP_ELEMENTS_NUM; i++) {
result[index++] = array[len - i];
}
return result;
}

}

/**
• Get shuffled int array
• @return array not in order and elements are not duplicate
*/
private static int[] getShuffledArray(int len) {
System.out.println("Start to generate test array... this may take several seconds.");
List list = new ArrayList (len);
for (int i = 0; i < len; i++) {
}
Collections.shuffle(list);

int[] ret = new int[len];
for (int i = 0; i < len; i++) {
ret[i] = list.get(i);
}
return ret;
}

}

2.3 位图数组思路

index=62/32=1

bit_index=62 mod 32 = 30

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**

• Implementation of finding top 100 elements out of a huge int array.
• There is an array of 10000000 different int numbers. Find out its largest 100
• elements. The implementation should be optimized for executing speed.
• Note: This is the second version of implementation, the previous one using
• thread pool provided by JDK concurrent toolkit is not efficient enough, the
• second version is an enhanced one based on bit map algorithm, which is estimated to
• have at least a 3 times faster and consume less memory usage.
• @author zhangxu04
*/
public class FindTopElements2 {

private static final int ARRAY_LENGTH = 10000000; // big array length

public static void main(String[] args) {
FindTopElements2 fte = new FindTopElements2(ARRAY_LENGTH + 1);

``````// Get a array which is not in order and elements are not duplicate
int[] array = getShuffledArray(ARRAY_LENGTH);

// Find top 100 elements and print them by desc order in the console
long start = System.currentTimeMillis();
fte.findTop100(array);
long end = System.currentTimeMillis();
System.out.println("Costs " + (end - start) + "ms");``````

}

private final int[] bitmap;

private final int size;

public FindTopElements2(final int size) {
this.size = size;
int len = ((size % 32) == 0) ? size / 32 : size / 32 + 1;
this.bitmap = new int[len];
}

private static int index(final int number) {
return number / 32;
}

private static int position(final int number) {
return number % 32;
}

private void adjustBitMap(final int index, final int position) {
int bit = bitmap[index] | (1 << position);
bitmap[index] = bit;
}

for (int i = 0; i < numArr.length; i++) {
}
}

}

public boolean getIndex(final int index) {
if (index > size) {
return false;
}

``````int bit = (bitmap[index(index)] >> position(index)) & 0x0001;
return (bit == 1);``````

}

private void findTop100(int[] arr) {
System.out.println("Start to compute.");
int[] result = new int[100];
int index = 0;
for (int i = bitmap.length - 1; i >= 0; i--) {
for (int j = 31; j >= 0; j--) {
if (((bitmap[i] >> j) & 0x0001) == 1) {
if (index == result.length) {
break;
}
result[index++] = ((i) * 32) + j ;
}
}
if (index == result.length) {
break;
}
}

``````for (int j = 0; j < result.length; j++) {
System.out.println(result[j]);
}
System.out.println("Finish to output result.");``````

}

/**
• Get shuffled int array
• @return array not in order and elements are not duplicate
*/
private static int[] getShuffledArray(int len) {
System.out.println("Start to generate test array... this may take several seconds.");
List list = new ArrayList (len);
for (int i = 0; i < len; i++) {
}
Collections.shuffle(list);

int[] ret = new int[len];
for (int i = 0; i < len; i++) {
ret[i] = list.get(i);
}
return ret;
}

}

3 总结

### 蓝猫163

PERL删除数组元素的多种方法

2013/04/23
0
0
java-生成无重随机序列-10x速~

alvinte
2013/03/01
0
2
169. Majority Element。

Given an array of size n, find the majority element. The majority element is the element that appears more than ⌊ n/2 ⌋ times. You may assume that the array is non-empty and ......

Leafage_M
2018/01/06
0
0
LeetCode-Two Sum

2018/04/01
0
0

2016/11/26
214
0

《货币商人》读后感作文选登3800字

《货币商人》读后感作文选登3800字： 领导之法、管理之术的大智慧与小技巧（宝安支行纪委书记葛希） 非常感谢夏书记向我们推荐了这本《货币商人》。这本书我读第一遍时惊现它像一个宝藏，蕴藏...

32分钟前
1
0

33分钟前
2
0

1.Dubbo是什么？ Dubbo 是一个分布式、高性能、透明化的 RPC 服务框架，提供服务自动注册、自动发现等高效服务治理方案， 可以和 Spring 框架无缝集成。 RPC 指的是远程调用协议，也就是说两...

mikechen优知
57分钟前
2
0

58分钟前
1
0

OS: liunx version: centos7.x date: 2019-01-18 1. cd / : 进入服务器根目录 2. cd .. : 进入当前目录的上一级 3. ls : 显示当前目录下的所有文件夹或文件(list的缩写) 4. ip addr : 展示服...

1
0