## python实现朴素bayes算法 原

元禛慎独

import numpy as np
from sklearn.utils import check_arrays
from sklearn.cross_validation import train_test_split
from sklearn import datasets
import os

os.system("clear")
##########################
class myBayes:
def __init__(self):
self.Px={}
self.Py={}
self.nx=0
self.lx=0
self.result=None

def fit(self, X, y):
Py={}
Px={}
k=len(np.unique(y))
for i in list(set(y)):
Py[i]=(y.tolist().count(i)+1)*1.0/(len(y)+k)
n_row, n_col=X.shape
for i in range(n_col):
Px.setdefault(i, {})
xylist=zip(X[:,i],y)
s=len(np.unique(X[:,i]))
for xy in list(set(xylist)):
Px[i][xy]=(xylist.count(xy)+1)*1.0/(Py[xy[1]]*(len(y)+k)+s-1)
self.Py=Py
self.Px=Px
self.nx=n_col
#print 'y',self.Py
#print 'x',self.Px

def predict(self, test_X):
tX=np.array(test_X)
ts=tX.shape
if len(ts)==0 :
return None
elif len(ts)==1 :
if len(tX)!=self.nx:
return None
else:
result={}
for i in self.Py.keys():
py=self.Py[i]
for j in range(ts[0]):
py=py*self.Px[j].get((tX[j],i),1-sum([Px[j][t] for t in Px[j].keys() if t[1]==i]))
result[py]=i
self.result=np.array(result[max(result.keys())])
return self.result

elif len(ts)==2:
if ts[1]!=self.nx:
return None
else:
result_list=[]
for x in tX:
#print x
result={}
for i in self.Py.keys():
py=self.Py[i]
for j in range(ts[1]):
py=py*self.Px[j].get((x[j],i),0)
result[py]=i
#print result
result_list.append(result[max(result.keys())])
self.result=np.array(result_list)
return self.result

#x1=[1,1,1,1,1,2,2,2,2,2,3,3,3,3,3]
#x2=['s','m','m','s','s','s','m','m','l','l','l','m','m','l','l']
#y=[0,0,1,1,0,0,0,1,1,1,1,1,1,1,0]
#x2dict={'s':1,'m':2,'l':3}
#X=np.array(zip(x1,[x2dict[x] for x in x2]))
#y=np.array(y)
#print X,y
iris=datasets.load_iris()
X=iris.data
y=iris.target

trainX,testX,trainy,testy=train_test_split(X,y,test_size=0.2)

clf=myBayes()
clf.fit(trainX,trainy)

predicted=clf.predict(testX)
print testy
print predicted

print np.mean(testy==predicted)

###########################################

yuanzhen@yuanzhen-ThinkPad-X121e:~/P_script\$ python mybayes.py

[2 0 2 0 2 0 2 1 2 0 0 0 1 2 2 1 2 0 2 1 2 2 2 1 1 2 1 1 0 2]
[2 0 2 0 2 0 2 1 2 0 0 2 2 2 2 1 2 0 2 1 2 1 2 2 1 2 1 1 0 2]
0.866666666667

[0 1 1 0 1 2 1 1 0 0 0 1 1 0 2 1 2 0 1 2 0 2 0 2 2 2 2 2 0 0]
[0 1 2 0 1 2 1 1 0 0 0 1 2 0 1 1 1 0 1 2 2 2 0 2 2 2 2 1 2 0]
0.766666666667

© 著作权归作者所有

### 元禛慎独

02/17
0
0
《数据挖掘导论》实验课——实验四、数据挖掘之KNN,Naive Bayes

06/21
0
0

02/17
0
0

02/17
0
0

02/17
0
0

64.监控平台介绍 安装zabbix 忘记admin密码

19.1 Linux监控平台介绍 19.2 zabbix监控介绍 19.3/19.4/19.6 安装zabbix 19.5 忘记Admin密码如何做 19.1 Linux监控平台介绍： 常见开源监控软件 ~1.cacti、nagios、zabbix、smokeping、ope...

oschina130111

10
0

7
0
DNS-over-HTTPS 的下一代是 DNS ON BLOCKCHAIN

6
0
CC攻击带来的危害我们该如何防御？

11
0

8
0