Python中的高级数据结构

Python中的高级数据结构

Python中的高级数据结构
• 发表于 2年前
• 阅读 116
• 收藏 4
• 评论 0

数据结构

1. Collections

collections模块包含了内建类型之外的一些有用的工具，例如Counter、defaultdict、OrderedDict、deque以及nametuple。其中Counter、deque以及defaultdict是最常用的类。

1.1 Counter()

 1 2 3 4 5 fromcollectionsimportCounter   li=["Dog","Cat","Mouse",42,"Dog",42,"Cat","Dog"] a=Counter(li) printa# Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})

 1 2 3 4 5 6 7 fromcollectionsimportCounter   li=["Dog","Cat","Mouse",42,"Dog",42,"Cat","Dog"] a=Counter(li) printa# Counter({'Dog': 3, 42: 2, 'Cat': 2, 'Mouse': 1})   printlen(set(li))# 4

 1 2 3 4 5 6 7 8 9 10 fromcollectionsimportCounter   li=["Dog","Cat","Mouse","Dog","Cat","Dog"] a=Counter(li)   printa# Counter({'Dog': 3, 'Cat': 2, 'Mouse': 1})   print"{0} : {1}".format(a.values(),a.keys()) # [1, 3, 2] : ['Mouse', 'Dog', 'Cat']   print(a.most_common(3))# [('Dog', 3), ('Cat', 2), ('Mouse', 1)]

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 importre fromcollectionsimportCounter   string="""   Lorem ipsum dolor sit amet, consectetur     adipiscing elit. Nunc ut elit id mi ultricies     adipiscing. Nulla facilisi. Praesent pulvinar,     sapien vel feugiat vestibulum, nulla dui pretium orci,     non ultricies elit lacus quis ante. Lorem ipsum dolor     sit amet, consectetur adipiscing elit. Aliquam     pretium ullamcorper urna quis iaculis. Etiam ac massa     sed turpis tempor luctus. Curabitur sed nibh eu elit     mollis congue. Praesent ipsum diam, consectetur vitae     ornare a, aliquam a nunc. In id magna pellentesque     tellus posuere adipiscing. Sed non mi metus, at lacinia     augue. Sed magna nisi, ornare in mollis in, mollis     sed nunc. Etiam at justo in leo congue mollis.     Nullam in neque eget metus hendrerit scelerisque     eu non enim. Ut malesuada lacus eu nulla bibendum     id euismod urna sodales.  """   words=re.findall(r'\w+', string)#This finds words in the document   lower_words=[word.lower()forwordinwords]#lower all the words   word_counts=Counter(lower_words)#counts the number each time a word appears printword_counts   # Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3, # 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium': # 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2, # 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2, # 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1, # 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1, # 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1, # 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1, # 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1})

1.2 Deque

Deque支持线程安全的，经过优化的append和pop操作，在队列两端的相关操作都能够达到近乎O(1)的时间复杂度。虽然list也支持类似的操作，但是它是对定长列表的操作表现很不错，而当遇到pop(0)和insert(0, v)这样既改变了列表的长度又改变其元素位置的操作时，其复杂度就变为O(n)了。

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 importtime fromcollectionsimportdeque   num=100000   defappend(c):     foriinrange(num):         c.append(i)   defappendleft(c):     ifisinstance(c, deque):         foriinrange(num):             c.appendleft(i)     else:         foriinrange(num):             c.insert(0, i) defpop(c):     foriinrange(num):         c.pop()   defpopleft(c):     ifisinstance(c, deque):         foriinrange(num):             c.popleft()     else:         foriinrange(num):             c.pop(0)   forcontainerin[deque,list]:     foroperationin[append, appendleft, pop, popleft]:         c=container(range(num))         start=time.time()         operation(c)         elapsed=time.time()-start         print"Completed {0}/{1} in {2} seconds: {3} ops/sec".format(               container.__name__, operation.__name__, elapsed, num/elapsed)   # Completed deque/append in 0.0250000953674 seconds: 3999984.74127 ops/sec # Completed deque/appendleft in 0.0199999809265 seconds: 5000004.76838 ops/sec # Completed deque/pop in 0.0209999084473 seconds: 4761925.52225 ops/sec # Completed deque/popleft in 0.0199999809265 seconds: 5000004.76838 ops/sec # Completed list/append in 0.0220000743866 seconds: 4545439.17637 ops/sec # Completed list/appendleft in 21.3209998608 seconds: 4690.21155917 ops/sec # Completed list/pop in 0.0240001678467 seconds: 4166637.52682 ops/sec # Completed list/popleft in 4.01799988747 seconds: 24888.0046791 ops/sec

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 fromcollectionsimportdeque q=deque(range(5)) q.append(5) q.appendleft(6) printq printq.pop() printq.popleft() printq.rotate(3) printq printq.rotate(-1) printq   # deque([6, 0, 1, 2, 3, 4, 5]) # 5 # 6 # None # deque([2, 3, 4, 0, 1]) # None # deque([3, 4, 0, 1, 2])

1.3 Defaultdict

defaultdict对象在当你希望使用它存放追踪数据的时候很有用。举个例子，假定你希望追踪一个单词在字符串中的位置，那么你可以这么做：

 1 2 3 4 5 6 7 8 9 10 11 12 13 fromcollectionsimportdefaultdict   s="the quick brown fox jumps over the lazy dog"   words=s.split() location=defaultdict(list) form, ninenumerate(words):     location[n].append(m)   printlocation   # defaultdict(, {'brown': [2], 'lazy': [7], 'over': [5], 'fox': [3], # 'dog': [8], 'quick': [1], 'the': [0, 6], 'jumps': [4]})

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 fromcollectionsimportdefaultdict   s="the quick brown fox jumps over the lazy dog"   words=s.split() location=defaultdict(set) form, ninenumerate(words):     location[n].add(m)   printlocation   # defaultdict(, {'brown': set([2]), 'lazy': set([7]), # 'over': set([5]), 'fox': set([3]), 'dog': set([8]), 'quick': set([1]), # 'the': set([0, 6]), 'jumps': set([4])})

 1 2 3 4 5 6 7 8 9 s="the quick brown fox jumps over the lazy dog" d={} words=s.split()   forkey, valueinenumerate(words):     d.setdefault(key, []).append(value) printd   # {0: ['the'], 1: ['quick'], 2: ['brown'], 3: ['fox'], 4: ['jumps'], 5: ['over'], 6: ['the'], 7: ['lazy'], 8: ['dog']}

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 classExample(dict):     def__getitem__(self, item):         try:             returndict.__getitem__(self, item)         exceptKeyError:             value=self[item]=type(self)()             returnvalue   a=Example()   a[1][2][3]=4 a[1][3][3]=5 a[1][2]['test']=6   printa# {1: {2: {'test': 6, 3: 4}, 3: {3: 5}}}

2. Array

array模块定义了一个很像list的新对象类型，不同之处在于它限定了这个类型只能装一种类型的元素。array元素的类型是在创建并使用的时候确定的。

 1 2 3 4 importarray   a=array.array("i", [1,2,3,4,5]) b=array.array(a.typecode, (2*xforxina))

 1 2 3 4 5 importarray   a=array.array("i", [1,2,3,4,5]) fori, xinenumerate(a):     a[i]=2*x

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 importarray fromtimeitimportTimer   defarraytest():     a=array.array("i", [1,2,3,4,5])     b=array.array(a.typecode, (2*xforxina))   defenumeratetest():     a=array.array("i", [1,2,3,4,5])     fori, xinenumerate(a):         a[i]=2*x   if__name__=='__main__':     m=Timer("arraytest()","from __main__ import arraytest")     n=Timer("enumeratetest()","from __main__ import enumeratetest")       printm.timeit()# 5.22479210582     printn.timeit()# 4.34367196717

3. Heapq

heapq模块使用一个用堆实现的优先级队列。堆是一种简单的有序列表，并且置入了堆的相关规则。

 1 2 3 4 5 6 7 8 9 importheapq   heap=[]   forvaluein[20,10,30,50,40]:     heapq.heappush(heap, value)   whileheap:     printheapq.heappop(heap)

heapq模块有两个函数nlargest()和nsmallest()，顾名思义，让我们来看看它们的用法。

 1 2 3 4 5 importheapq   nums=[1,8,2,23,7,-4,18,23,42,37,2] print(heapq.nlargest(3, nums))# Prints [42, 37, 23] print(heapq.nsmallest(3, nums))# Prints [-4, 1, 2]

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 importheapq   portfolio=[ {'name':'IBM','shares':100,'price':91.1}, {'name':'AAPL','shares':50,'price':543.22}, {'name':'FB','shares':200,'price':21.09}, {'name':'HPQ','shares':35,'price':31.75}, {'name':'YHOO','shares':45,'price':16.35}, {'name':'ACME','shares':75,'price':115.65} ] cheap=heapq.nsmallest(3, portfolio, key=lambdas: s['price']) expensive=heapq.nlargest(3, portfolio, key=lambdas: s['price'])   printcheap   # [{'price': 16.35, 'name': 'YHOO', 'shares': 45}, # {'price': 21.09, 'name': 'FB', 'shares': 200}, {'price': 31.75, 'name': 'HPQ', 'shares': 35}]   printexpensive   # [{'price': 543.22, 'name': 'AAPL', 'shares': 50}, {'price': 115.65, 'name': 'ACME', # 'shares': 75}, {'price': 91.1, 'name': 'IBM', 'shares': 100}]

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 importheapq   classItem:     def__init__(self, name):         self.name=name       def__repr__(self):         return'Item({!r})'.format(self.name)   classPriorityQueue:     def__init__(self):         self._queue=[]         self._index=0       defpush(self, item, priority):         heapq.heappush(self._queue, (-priority,self._index, item))         self._index+=1       defpop(self):         returnheapq.heappop(self._queue)[-1]   q=PriorityQueue() q.push(Item('foo'),1) q.push(Item('bar'),5) q.push(Item('spam'),4) q.push(Item('grok'),1)   printq.pop()# Item('bar') printq.pop()# Item('spam') printq.pop()# Item('foo') printq.pop()# Item('grok')

4. Bisect

bisect模块能够提供保持list元素序列的支持。它使用了二分法完成大部分的工作。它在向一个list插入元素的同时维持list是有序的。在某些情况下，这比重复的对一个list进行排序更为高效，并且对于一个较大的list来说，对每步操作维持其有序也比对其排序要高效。

 1 a=[(0,100), (150,220), (500,1000)]

 1 2 3 4 5 6 7 importbisect   a=[(0,100), (150,220), (500,1000)]   bisect.insort_right(a, (250,400))   printa# [(0, 100), (150, 220), (250, 400), (500, 1000)]

 1 2 3 4 5 6 7 8 9 importbisect   a=[(0,100), (150,220), (500,1000)]   bisect.insort_right(a, (250,400)) bisect.insort_right(a, (399,450)) printa# [(0, 100), (150, 220), (250, 400), (500, 1000)]   printbisect.bisect(a, (550,1200))# 5

bisect(sequence, item) => index 返回元素应该的插入点，但序列并不被修改。

 1 2 3 4 5 6 7 8 9 10 11 importbisect   a=[(0,100), (150,220), (500,1000)]   bisect.insort_right(a, (250,400)) bisect.insort_right(a, (399,450)) printa# [(0, 100), (150, 220), (250, 400), (500, 1000)]   printbisect.bisect(a, (550,1200))# 5 bisect.insort_right(a, (550,1200)) printa# [(0, 100), (150, 220), (250, 400), (399, 450), (500, 1000), (550, 1200)]

5. Weakref

weakref模块能够帮助我们创建Python引用，却不会阻止对象的销毁操作。这一节包含了weak reference的基本用法，并且引入一个代理类。

 1 2 >>> a=[1,2,3] >>> b=a

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 classFoo(object):     def__init__(self):         self.obj=None         print'created'       def__del__(self):         print'destroyed'       defshow(self):         printself.obj       defstore(self, obj):         self.obj=obj   a=Foo()# created b=a dela delb# destroyed

Weak reference则是对对象的引用计数器不会产生影响。当一个对象存在weak reference时，并不会影响对象的撤销。这就说，如果一个对象仅剩下weak reference，那么它将会被销毁。

 1 2 3 4 5 >>>importweakref >>> a=Foo() created >>> b=weakref.ref(a) >>> b

 1 2 3 4 >>> a==b() True >>> b().show() None

 1 2 >>>dela destroyed

 1 2 >>> b()isNone True

 1 2 3 4 5 6 7 8 9 10 11 12 >>> a=Foo() created >>> b=weakref.proxy(a) >>> b.store('fish') >>> b.show() fish >>>dela destroyed >>> b.show() Traceback (most recent call last):   File"", line1,in? ReferenceError: weakly-referencedobjectno longer exists

完整的例子：

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 importweakref importgc   classMyObject(object):     defmy_method(self):         print'my_method was called!'   obj=MyObject() r=weakref.ref(obj)   gc.collect() assertr()isobj#r() allows you to access the object referenced: it's there.   obj=1#Let's change what obj references to gc.collect() assertr()isNone#There is no object left: it was gc'ed.

6. Copy()

shallow和deep copying的不同之处在于对于混合型对象的操作(混合对象是包含了其他类型对象的对象，例如list或其他类实例)。

• 对于shallow copy而言，它创建一个新的混合对象，并且将原对象中其他对象的引用插入新对象。
• 对于deep copy而言，它创建一个新的对象，并且递归地复制源对象中的其他对象并插入新的对象中。

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 importcopy   a=[1,2,3] b=[4,5]   c=[a,b]   # Normal Assignment d=c   printid(c)==id(d)         # True - d is the same object as c printid(c[0])==id(d[0])   # True - d[0] is the same object as c[0]   # Shallow Copy d=copy.copy(c)   printid(c)==id(d)         # False - d is now a new object printid(c[0])==id(d[0])   # True - d[0] is the same object as c[0]   # Deep Copy d=copy.deepcopy(c)   printid(c)==id(d)         # False - d is now a new object printid(c[0])==id(d[0])   # False - d[0] is now a new object

shallow copy (copy())操作创建一个新的容器，其包含的引用指向原对象中的对象。

deep copy (deepcopy())创建的对象包含的引用指向复制出来的新对象。

复杂的例子：

1) 复制一个graph实例，使用deepcopy，但其manager指向为原graph的manager。

2) 复制一个manager，完全创建新manager，但拷贝原有的所有graph。

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 importweakref, copy   classGraph(object):     def__init__(self, manager=None):         self.manager=NoneifmanagerisNoneelseweakref.ref(manager)     def__deepcopy__(self, memodict):         manager=self.manager()         returnGraph(memodict.get(id(manager), manager))   classManager(object):     def__init__(self, graphs=[]):         self.graphs=graphs         forginself.graphs:             g.manager=weakref.ref(self)   a=Manager([Graph(), Graph()]) b=copy.deepcopy(a)   if[g.manager()isbforginb.graphs]:     printTrue# True   ifcopy.deepcopy(a.graphs[0]).manager()isa:     printTrue# True

7. Pprint()

Pprint模块能够提供比较优雅的数据结构打印方式，如果你需要打印一个结构较为复杂，层次较深的字典或是JSON对象时，使用Pprint能够提供较好的打印结果。

 1 2 3 4 5 6 7 8 9 importpprint   matrix=[ [1,2,3], [4,5,6], [7,8,9] ] a=pprint.PrettyPrinter(width=20) a.pprint(matrix)   # [[1, 2, 3], #  [4, 5, 6], #  [7, 8, 9]]

1. 单链链表

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 classNode:     def__init__(self):         self.data=None         self.nextNode=None       defset_and_return_Next(self):         self.nextNode=Node()         returnself.nextNode       defgetNext(self):         returnself.nextNode       defgetData(self):         returnself.data       defsetData(self, d):         self.data=d   classLinkedList:     defbuildList(self, array):         self.head=Node()         self.head.setData(array[0])         self.temp=self.head         foriinarray[1:]:             self.temp=self.temp.set_and_return_Next()             self.temp.setData(i)             self.tail=self.temp         returnself.head     defprintList(self):         tempNode=self.head         while(tempNode!=self.tail):             print(tempNode.getData())             tempNode=tempNode.getNext()         print(self.tail.getData()) myArray=[3,5,4,6,2,6,7,8,9,10,21]   myList=LinkedList() myList.buildList(myArray) myList.printList()

2. 用Python实现的普林姆算法

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 fromcollectionsimportdefaultdict fromheapqimportheapify, heappop, heappush   defprim( nodes, edges ):     conn=defaultdict(list)     forn1,n2,cinedges:         conn[ n1 ].append( (c, n1, n2) )         conn[ n2 ].append( (c, n2, n1) )       mst=[]     used=set( nodes[0] )     usable_edges=conn[ nodes[0] ][:]     heapify( usable_edges )       whileusable_edges:         cost, n1, n2=heappop( usable_edges )         ifn2notinused:             used.add( n2 )             mst.append( ( n1, n2, cost ) )               foreinconn[ n2 ]:                 ife[2]notinused:                     heappush( usable_edges, e )     returnmst   #test nodes=list("ABCDEFG") edges=[ ("A","B",7), ("A","D",5),           ("B","C",8), ("B","D",9), ("B","E",7),       ("C","E",5),       ("D","E",15), ("D","F",6),       ("E","F",8), ("E","G",9),       ("F","G",11)]   print"prim:", prim( nodes, edges )

×