手撸golang 基本数据结构与算法 堆排序

原创
02/24 06:55
阅读数 19

缘起

最近阅读<<我的第一本算法书>>(【日】石田保辉;宫崎修一) 本系列笔记拟采用golang练习之

堆排序

堆排序的特点是利用了数据结构中的堆。
首先,在堆中存储所有的数据,并按降序来构建堆。
为了排序,需要再从堆中把数据一个个取出来。

堆排序一开始需要将n个数据存进堆里,所需时间为O(nlogn)。
每轮取出最大的数据并重构堆所需要的时间为O(logn)。
由于总共有n轮,所以重构后排序的时间也是O(nlog n)。
因此,整体来看堆排序的时间复杂度为O(nlog n)。

这样来看,堆排序的运行时间比之前讲到的冒泡排序、选择排序、插入排序的时间O(n^2)都要短。

摘自 <<我的第一本算法书>> 【日】石田保辉;宫崎修一

目标

  • 构造一个堆, 并测试堆排序的效率

设计

  • IHeap: 定义堆的接口
  • tArrayHeap
    • 基于数组的堆的实现.
    • 堆是一种特殊的二叉完全树: 父节点总是小于任意的子节点
    • 堆的父子节点的索引存在线性关系, 以0下标为例
      • parent.index = (node.index - 1) / 2
      • leftChild.index = node.index*2 + 1
      • rightChild.index = leftChild.index + 1
  • ISorter:
    • 定义排序接口
    • 定义值比较函数以兼容任意值类型
    • 通过调整比较函数可实现倒序输出
  • tHeapSort
    • 利用tArrayHeap进行堆排序
    • 先把整个数组push进heap
    • 然后逐个pop出来, 即可

单元测试

heap_sort_test.go, 测试过程与之前的冒泡, 选择, 插入等类似, 但测试数组扩大到10万元素.

package sorting

import (
	"fmt"
	"learning/gooop/sorting"
	"learning/gooop/sorting/heap_sort"
	"math/rand"
	"testing"
	"time"
)

func Test_HeapSort(t *testing.T) {
	fnAssertTrue := func(b bool, msg string) {
		if !b {
			t.Fatal(msg)
		}
	}

	reversed := false
	fnCompare := func(a interface{}, b interface{}) sorting.CompareResult {
		i1 := a.(int)
		i2 := b.(int)

		if i1 < i2 {
			if reversed {
				return sorting.GREATER
			} else {
				return sorting.LESS
			}
		} else if i1 == i2 {
			return sorting.EQUAL
		} else {
			if reversed {
				return sorting.LESS
			} else {
				return sorting.GREATER
			}
		}
	}

	fnTestSorter := func(sorter sorting.ISorter) {
		reversed = false

		// test simple array
		samples := []interface{} { 2,3,1,5,4,7,6 }
		sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]",  "expecting 1,2,3,4,5,6,7")
		t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")

		// test 10000 items sorting
		rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
		sampleCount := 100*1000
		t.Logf("prepare large array with %v items", sampleCount)
		samples = make([]interface{}, sampleCount)
		for i := 0;i < sampleCount;i++ {
			samples[i] = rnd.Intn(sampleCount*10)
		}

		t.Logf("sorting large array with %v items", sampleCount)
		t0 := time.Now().UnixNano()
		sorter.Sort(samples, fnCompare)
		cost := time.Now().UnixNano() - t0
		for i := 1;i < sampleCount;i++ {
			fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")
		}
		t.Logf("end sorting large array, cost = %v ms", cost / 1000000)

		// test 0-20
		sampleCount = 20
		t.Log("sorting 0-20")
		samples = make([]interface{}, sampleCount)
		for i := 0;i < sampleCount;i++ {
			for {
				p := rnd.Intn(sampleCount)
				if samples[p] == nil {
					samples[p] = i
					break
				}
			}
		}
		t.Logf("unsort = %v", samples)

		sorter.Sort(samples, fnCompare)
		t.Logf("sorted = %v", samples)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")
		t.Log("pass sorting 0-20")

		// test special
		samples = []interface{} {}
		sorter.Sort(samples, fnCompare)
		t.Log("pass sorting []")

		samples = []interface{} { 1 }
		sorter.Sort(samples, fnCompare)
		t.Log("pass sorting [1]")

		samples = []interface{} { 3,1 }
		sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]",  "expecting 1,3")
		t.Log("pass sorting [1 3]")

		reversed = true
		samples = []interface{} { 2, 3,1 }
		sorter.Sort(samples, fnCompare)
		fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]",  "expecting 3,2,1")
		t.Log("pass sorting [3 2 1]")
	}

	t.Log("\ntesting HeapSort")
	fnTestSorter(heap_sort.HeapSort)
}

测试输出

10万元素排序只需数十毫秒, 确实比冒泡, 选择, 插入等排序有指数级的提升. 代价是多占用了一个堆的空间.

$ go test -v heap_sort_test.go 
=== RUN   Test_HeapSort
    heap_sort_test.go:109: 
        testing HeapSort
    heap_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]
    heap_sort_test.go:53: prepare large array with 100000 items
    heap_sort_test.go:59: sorting large array with 100000 items
    heap_sort_test.go:66: end sorting large array, cost = 67 ms
    heap_sort_test.go:70: sorting 0-20
    heap_sort_test.go:81: unsort = [4 15 1 13 19 14 11 12 9 8 3 7 16 18 2 10 17 6 0 5]
    heap_sort_test.go:84: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
    heap_sort_test.go:86: pass sorting 0-20
    heap_sort_test.go:91: pass sorting []
    heap_sort_test.go:95: pass sorting [1]
    heap_sort_test.go:100: pass sorting [1 3]
    heap_sort_test.go:106: pass sorting [3 2 1]
--- PASS: Test_HeapSort (0.07s)
PASS
ok      command-line-arguments  0.076s

IHeap.go

定义堆的接口

package heap_sort

type IHeap interface {
	Size() int
	IsEmpty() bool
	IsNotEmpty() bool

	Push(value interface{})
	Pop() (error, interface{})
}

tArrayHeap

  • 基于数组的堆的实现.
  • 堆是一种特殊的二叉完全树: 父节点总是小于任意的子节点
  • 堆的父子节点的索引存在线性关系, 以0下标为例
    • parent.index = (node.index - 1) / 2
    • leftChild.index = node.index*2 + 1
    • rightChild.index = leftChild.index + 1
package heap_sort

import (
	"errors"
	"learning/gooop/sorting"
)

type tArrayHeap struct {
	comparator sorting.CompareFunction
	items []interface{}
	size int
	version int64
}

func newArrayHeap(comparator sorting.CompareFunction) IHeap {
	return &tArrayHeap{
		comparator: comparator,
		items: make([]interface{}, 0),
		size: 0,
		version: 0,
	}
}

func (me *tArrayHeap) Size() int {
	return me.size
}

func (me *tArrayHeap) IsEmpty() bool {
	return me.size <= 0
}

func (me *tArrayHeap) IsNotEmpty() bool {
	return !me.IsEmpty()
}

func (me *tArrayHeap) Push(value interface{}) {
	me.version++

	me.ensureSize(me.size + 1)
	me.items[me.size] = value
	me.size++

	me.shiftUp(me.size - 1)
	me.version++
}


func (me *tArrayHeap) ensureSize(size int) {
	for ;len(me.items) < size; {
		me.items = append(me.items, nil)
	}
}

func (me *tArrayHeap) parentOf(i int) int {
	return (i - 1) / 2
}

func (me *tArrayHeap) leftChildOf(i int) int {
	return i*2 + 1
}

func (me *tArrayHeap) rightChildOf(i int) int {
	return me.leftChildOf(i) + 1
}

func (me *tArrayHeap) last() (i int, v interface{}) {
	if me.IsEmpty() {
		return -1, nil
	}

	i = me.size - 1
	v = me.items[i]
	return i,v
}

func (me *tArrayHeap) shiftUp(i int) {
	if i <= 0 {
		return
	}
	v := me.items[i]

	pi := me.parentOf(i)
	pv := me.items[pi]

	if me.comparator(v, pv) == sorting.LESS {
		me.items[pi], me.items[i] = v, pv
		me.shiftUp(pi)
	}
}

func (me *tArrayHeap) Pop() (error, interface{}) {
	if me.IsEmpty() {
		return gNoMoreElementsError, nil
	}

	me.version++

	top := me.items[0]
	li, lv := me.last()
	me.items[0] = nil
	me.size--

	if me.IsEmpty() {
		return nil, top
	}

	me.items[0] = lv
	me.items[li] = nil

	me.shiftDown(0)
	me.version++

	return nil, top
}

func (me *tArrayHeap) shiftDown(i int) {
	pv := me.items[i]
	ok, ci, cv := me.minChildOf(i)
	if ok && me.comparator(cv, pv) == sorting.LESS {
		me.items[i], me.items[ci] = cv, pv
		me.shiftDown(ci)
	}
}

func (me *tArrayHeap) minChildOf(p int) (ok bool, i int, v interface{}) {
	li := me.leftChildOf(p)
	if li >= me.size {
		return false, 0, nil
	}
	lv := me.items[li]

	ri := me.rightChildOf(p)
	if ri >= me.size {
		return true, li, lv
	}
	rv := me.items[ri]

	if me.comparator(lv, rv) == sorting.LESS {
		return true, li, lv
	} else {
		return true, ri, rv
	}
}

var gNoMoreElementsError = errors.New("no more elements")

ISorter

  • 定义排序接口
  • 定义值比较函数以兼容任意值类型
  • 通过调整比较函数可实现倒序输出
package sorting

type ISorter interface {
	Sort(data []interface{}, comparator CompareFunction) []interface{}
}

type CompareFunction func(a interface{}, b interface{}) CompareResult

type CompareResult int
const LESS CompareResult = -1
const EQUAL CompareResult = 0
const GREATER CompareResult = 1

tHeapSort

  • 堆排序器, 实现ISorter接口
  • 利用tArrayHeap进行堆排序
  • 先把整个数组push进heap
  • 然后逐个pop出来, 即可
package heap_sort

import "learning/gooop/sorting"

type tHeapSort struct {
}

func newHeapSort() sorting.ISorter {
	return &tHeapSort{}
}


func (me *tHeapSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} {
	heap := newArrayHeap(comparator)
	for _,it := range data {
		heap.Push(it)
	}

	for i,_ := range data {
		e,v := heap.Pop()
		if e == nil {
			data[i] = v
		} else {
			panic(e)
		}
	}

	return data
}


var HeapSort = newHeapSort()

(end)

展开阅读全文
go
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部