2021/03/07 03:01

# KNN算法——k近邻算法

import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from collections import Counter

if __name__ == "__main__":

# 所有的肿瘤数据
raw_data_x = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]]
# 肿瘤数据的类型，0为良性肿瘤，1为恶性肿瘤
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_x)
Y_train = np.array(raw_data_y)
plt.scatter(X_train[Y_train == 0, 0], X_train[Y_train == 0, 1], color='g')
plt.scatter(X_train[Y_train == 1, 0], X_train[Y_train == 1, 1], color='r')
x = np.array([8.093607318, 3.365731514])
plt.scatter(x[0], x[1], color='b')
# 计算新样本到已有样本各个点的距离
distances = [sqrt(np.sum((x_train - x)**2)) for x_train in X_train]
print(distances)
# 对这个距离进行排序，排序的结果是已知样本的索引
nearest = np.argsort(distances)
print(nearest)
# 这里我们取6个近邻数据
k = 6
# 获取这前6个数据的肿瘤类型
topK_y = [Y_train[i] for i in nearest[:k]]
print(topK_y)
# 对这6个相近的肿瘤类型进行投票，投票数高的，新进的肿瘤数据就是该类型的肿瘤
# 打印出新进肿瘤数据的肿瘤类型
plt.show()

[4.812566907609877, 5.229270827235305, 6.749798999160064, 4.6986266144110695, 5.83460014556857, 1.4900114024329525, 2.354574897431513, 1.3761132675144652, 0.3064319992975, 2.5786840957478887]
[8 7 5 6 9 3 0 1 4 2]
[1, 1, 1, 1, 1, 0]
1

import numpy as np
from math import sqrt
from collections import Counter

def kNN_classify(k, X_train, Y_train, x):

assert 1 <= k <= X_train.shape[0], "k无效"
assert X_train.shape[0] == Y_train.shape[0], \
"X数据集的大小必须跟Y数据集的大小相同"
assert X_train.shape[1] == x.shape[0], \
"x的维度必须与X数据集的数据维度相同"
# 计算新样本到已有样本各个点的距离
distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in X_train]
# 对这个距离进行排序，排序的结果是已知样本的索引
nearest = np.argsort(distances)
# 获取这前6个数据的肿瘤类型
topK_y = [Y_train[i] for i in nearest[:k]]
# 对这6个相近的肿瘤类型进行投票，投票数高的，新进的肿瘤数据就是该类型的肿瘤
return votes.most_common(1)[0][0]
import numpy as np
import matplotlib.pyplot as plt
from playLA.KNN import kNN_classify

if __name__ == "__main__":

# 所有的肿瘤数据
raw_data_x = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]]
# 肿瘤数据的类型，0为良性肿瘤，1为恶性肿瘤
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_x)
Y_train = np.array(raw_data_y)
plt.scatter(X_train[Y_train == 0, 0], X_train[Y_train == 0, 1], color='g')
plt.scatter(X_train[Y_train == 1, 0], X_train[Y_train == 1, 1], color='r')
x = np.array([8.093607318, 3.365731514])
plt.scatter(x[0], x[1], color='b')
res = kNN_classify(6, X_train, Y_train, x)
print(res)
plt.show()

1

from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":
# 所有的肿瘤数据
raw_data_x = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]]
# 肿瘤数据的类型，0为良性肿瘤，1为恶性肿瘤
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_x)
Y_train = np.array(raw_data_y)
plt.scatter(X_train[Y_train == 0, 0], X_train[Y_train == 0, 1], color='g')
plt.scatter(X_train[Y_train == 1, 0], X_train[Y_train == 1, 1], color='r')
x = np.array([8.093607318, 3.365731514])
plt.scatter(x[0], x[1], color='b')
kNN_classifier = KNeighborsClassifier(n_neighbors=6)
# 提交数据集
kNN_classifier.fit(X_train, Y_train)
X_predict = x.reshape(1, -1)
# 获取训练结果集
predict = kNN_classifier.predict(X_predict)
# 打印第0个样本的特征
print(predict[0])
plt.show()

1

import numpy as np
from math import sqrt
from collections import Counter

class KNNClassifier:

def __init__(self, k):
# 初始化kNN分类器
assert k >= 1, "k必须有效"
self.k = k
self._X_train = None
self._Y_train = None

def fit(self, X_train, Y_train):
# 根据训练数据集X_train和Y_train训练kNN分类器
assert X_train.shape[0] == Y_train.shape[0], \
"X数据集的大小必须跟Y数据集的大小相同"
assert self.k <= X_train.shape[0], \
"X数据集的大小最小为k"
self._X_train = X_train
self._Y_train = Y_train
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self._X_train is not None and self._Y_train is not None, \
"预测前必须提交数据集"
assert X_predict.shape[1] == self._X_train.shape[1], \
"预测数据的维度必须与X数据集的数据维度相同"
y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)

def _predict(self, x):
# 给定单个待预测数据x，返回x的预测结果值
assert x.shape[0] == self._X_train.shape[1], \
"预测数据的维度必须与X数据集的数据维度相同"
# 计算新样本到已有样本各个点的距离
distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in self._X_train]
# 对这个距离进行排序，排序的结果是已知样本的索引
nearest = np.argsort(distances)
# 获取这前6个数据的肿瘤类型
topK_y = [self._Y_train[i] for i in nearest[:self.k]]
# 对这6个相近的肿瘤类型进行投票，投票数高的，新进的肿瘤数据就是该类型的肿瘤
return votes.most_common(1)[0][0]
from playLA.KNNClassifier import KNNClassifier
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":
# 所有的肿瘤数据
raw_data_x = [[3.393533211, 2.331273381],
[3.110073483, 1.781539638],
[1.343808831, 3.368360954],
[3.582294042, 4.679179110],
[2.280362439, 2.866990263],
[7.423436942, 4.696522875],
[5.745051997, 3.533989803],
[9.172168622, 2.511101045],
[7.792783481, 3.424088941],
[7.939820817, 0.791637231]]
# 肿瘤数据的类型，0为良性肿瘤，1为恶性肿瘤
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
X_train = np.array(raw_data_x)
Y_train = np.array(raw_data_y)
plt.scatter(X_train[Y_train == 0, 0], X_train[Y_train == 0, 1], color='g')
plt.scatter(X_train[Y_train == 1, 0], X_train[Y_train == 1, 1], color='r')
x = np.array([8.093607318, 3.365731514])
plt.scatter(x[0], x[1], color='b')
kNN_classifier = KNNClassifier(k=6)
# 提交数据集
kNN_classifier.fit(X_train, Y_train)
X_predict = x.reshape(1, -1)
# 获取训练结果集
predict = kNN_classifier.predict(X_predict)
# 打印第0个样本的特征
print(predict[0])
plt.show()


1

import numpy as np
from math import sqrt
from collections import Counter

class KNNClassifier:

def __init__(self, k):
# 初始化kNN分类器
assert k >= 1, "k必须有效"
self.k = k
self._X_train = None
self._Y_train = None

def fit(self, X_train, Y_train):
# 根据训练数据集X_train和Y_train训练kNN分类器
assert X_train.shape[0] == Y_train.shape[0], \
"X数据集的大小必须跟Y数据集的大小相同"
assert self.k <= X_train.shape[0], \
"X数据集的大小最小为k"
self._X_train = X_train
self._Y_train = Y_train
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self._X_train is not None and self._Y_train is not None, \
"预测前必须提交数据集"
assert X_predict.shape[1] == self._X_train.shape[1], \
"预测数据的维度必须与X数据集的数据维度相同"
y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)

def _predict(self, x):
# 给定单个待预测数据x，返回x的预测结果值
assert x.shape[0] == self._X_train.shape[1], \
"预测数据的维度必须与X数据集的数据维度相同"
# 计算新样本到已有样本各个点的距离
distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in self._X_train]
# 对这个距离进行排序，排序的结果是已知样本的索引
nearest = np.argsort(distances)
# 获取这前6个数据的肿瘤类型
topK_y = [self._Y_train[i] for i in nearest[:self.k]]
# 对这6个相近的肿瘤类型进行投票，投票数高的，新进的肿瘤数据就是该类型的肿瘤

def score(self, X_test, y_test):
# 根据测试数据集X_test和y_test确定当前模型的准确度
y_predict = self.predict(X_test)
return self._accuracy_score(y_test, y_predict)

def _accuracy_score(self, y_true, y_predict):
# 计算真实值和预测值之间的准确度
assert y_true.shape[0] == y_predict.shape[0], \
"真实值的维度必须与预测值的维度相等"
return sum(y_true == y_predict) / len(y_true)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.KNNClassifier import KNNClassifier

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_ratio=0.2)
my_knn_crf = KNNClassifier(k=3)
my_knn_crf.fit(X_train, y_train)
print(my_knn_crf.score(X_test, y_test))


(1797, 64)
0
0.9888579387186629

scikt-learn中的分类准确度

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))

(1797, 64)
0
0.9888888888888889

kNN算法中的k是典型的超参数

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667

kNN算法并不只有这一个超参数。

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)
# 考虑距离
best_method = ""
best_score = 0
best_k = -1
for method in ["uniform", "distance"]:
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_method = method
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667
best_method = uniform
best_k = 4
best_score = 0.9916666666666667

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import timeit

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)
# 考虑距离
best_method = ""
best_score = 0
best_k = -1
for method in ["uniform", "distance"]:
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_method = method
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)
# 搜索明可夫斯基距离的p
start_time = timeit.default_timer()
best_p = -1
best_score = 0
best_k = -1
for k in range(1, 11):
for p in range(1, 6):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights='distance', p=p)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_p = p
print(timeit.default_timer() - start_time)
print("best_p =", best_p)
print("best_k =", best_k)
print("best_score =", best_score)

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667
best_method = uniform
best_k = 4
best_score = 0.9916666666666667
10.488031614
best_p = 2
best_k = 3
best_score = 0.9888888888888889

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import timeit
from sklearn.model_selection import GridSearchCV

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)
# 考虑距离
best_method = ""
best_score = 0
best_k = -1
for method in ["uniform", "distance"]:
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_method = method
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)
# 搜索明可夫斯基距离的p
start_time = timeit.default_timer()
best_p = -1
best_score = 0
best_k = -1
for k in range(1, 11):
for p in range(1, 6):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights='distance', p=p)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_p = p
print(timeit.default_timer() - start_time)
print("best_p =", best_p)
print("best_k =", best_k)
print("best_score =", best_score)
# 网格搜索
param_grid = [
{
'weights': ['uniform'],
'n_neighbors': [i for i in range(1, 11)]
},
{
'weights': ['distance'],
'n_neighbors': [i for i in range(1, 11)],
'p': [i for i in range(1, 6)]
}
]
knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid)
start_time = timeit.default_timer()
grid_search.fit(X_train, y_train)
print(timeit.default_timer() - start_time)
print(grid_search.best_estimator_)
print(grid_search.best_score_)
print(grid_search.best_params_)

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667
best_method = uniform
best_k = 4
best_score = 0.9916666666666667
10.596328198999998
best_p = 2
best_k = 3
best_score = 0.9888888888888889
36.002135212
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights='uniform')
0.9860820751064653
{'n_neighbors': 1, 'weights': 'uniform'}

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import timeit
from sklearn.model_selection import GridSearchCV

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)
# 考虑距离
best_method = ""
best_score = 0
best_k = -1
for method in ["uniform", "distance"]:
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_method = method
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)
# 搜索明可夫斯基距离的p
start_time = timeit.default_timer()
best_p = -1
best_score = 0
best_k = -1
for k in range(1, 11):
for p in range(1, 6):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights='distance', p=p)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_p = p
print(timeit.default_timer() - start_time)
print("best_p =", best_p)
print("best_k =", best_k)
print("best_score =", best_score)
# 网格搜索
param_grid = [
{
'weights': ['uniform'],
'n_neighbors': [i for i in range(1, 11)]
},
{
'weights': ['distance'],
'n_neighbors': [i for i in range(1, 11)],
'p': [i for i in range(1, 6)]
}
]
knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid)
start_time = timeit.default_timer()
grid_search.fit(X_train, y_train)
print(timeit.default_timer() - start_time)
print(grid_search.best_estimator_)
print(grid_search.best_score_)
print(grid_search.best_params_)
knn_clf = grid_search.best_estimator_
print(knn_clf.predict(X_test))
print(knn_clf.score(X_test, y_test))

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667
best_method = uniform
best_k = 4
best_score = 0.9916666666666667
10.529266493
best_p = 2
best_k = 3
best_score = 0.9888888888888889
36.449392196999995
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights='uniform')
0.9860820751064653
{'n_neighbors': 1, 'weights': 'uniform'}
[8 1 3 4 4 0 7 0 8 0 4 6 1 1 2 0 1 6 7 3 3 6 3 2 9 4 0 2 0 3 0 8 7 2 3 5 1
3 1 5 8 6 2 6 3 1 3 0 0 4 9 9 2 8 7 0 5 4 0 9 5 5 9 3 4 2 8 8 7 1 4 3 0 2
7 2 1 2 4 0 9 0 6 6 2 0 0 5 4 4 3 1 3 8 6 4 4 7 5 6 8 4 8 4 6 9 7 7 0 8 8
3 9 7 1 8 4 2 7 0 0 4 9 6 7 3 4 6 4 8 4 7 2 6 5 5 8 7 2 5 5 9 7 9 3 1 9 4
4 1 5 1 6 4 4 8 1 6 2 5 2 1 4 4 3 9 4 0 6 0 8 3 8 7 3 0 3 0 5 9 2 7 1 8 1
4 3 3 7 8 2 7 2 2 8 0 5 7 6 7 3 4 7 1 7 0 9 2 8 9 3 8 9 1 1 1 9 8 8 0 3 7
3 3 4 8 2 1 8 6 0 1 7 7 5 8 3 8 7 6 8 4 2 6 2 3 7 4 9 3 5 0 6 3 8 3 3 1 4
5 3 2 5 6 8 6 9 5 5 3 6 5 9 3 7 7 0 2 4 9 9 9 2 5 6 1 9 6 9 7 7 4 5 0 0 5
3 8 4 4 3 2 5 3 2 2 3 0 9 8 2 1 4 0 6 2 8 0 6 4 9 9 8 3 9 8 6 3 2 7 9 4 2
7 5 1 1 6 1 0 4 9 2 9 0 3 3 0 7 4 8 5 9 5 9 5 0 7 9 8]
0.9833333333333333

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import timeit
from sklearn.model_selection import GridSearchCV

if __name__ == "__main__":

X = digits.data
y = digits.target
print(X.shape)
some_digit = X[666]
print(y[666])
some_digit_image = some_digit.reshape(8, 8)
# plt.imshow(some_digit_image, cmap=matplotlib.cm.binary)
# plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)
knn_crf = KNeighborsClassifier(n_neighbors=3)
knn_crf.fit(X_train, y_train)
print(knn_crf.score(X_test, y_test))
# 寻找最好的超参数
best_score = 0
best_k = -1
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
print("best_k =", best_k)
print("best_score =", best_score)
# 考虑距离
best_method = ""
best_score = 0
best_k = -1
for method in ["uniform", "distance"]:
for k in range(1, 11):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_method = method
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)
# 搜索明可夫斯基距离的p
start_time = timeit.default_timer()
best_p = -1
best_score = 0
best_k = -1
for k in range(1, 11):
for p in range(1, 6):
knn_clf = KNeighborsClassifier(n_neighbors=k, weights='distance', p=p)
knn_clf.fit(X_train, y_train)
score = knn_clf.score(X_test, y_test)
if score > best_score:
best_k = k
best_score = score
best_p = p
print(timeit.default_timer() - start_time)
print("best_p =", best_p)
print("best_k =", best_k)
print("best_score =", best_score)
# 网格搜索
param_grid = [
{
'weights': ['uniform'],
'n_neighbors': [i for i in range(1, 11)]
},
{
'weights': ['distance'],
'n_neighbors': [i for i in range(1, 11)],
'p': [i for i in range(1, 6)]
}
]
knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid)
start_time = timeit.default_timer()
grid_search.fit(X_train, y_train)
print(timeit.default_timer() - start_time)
print(grid_search.best_estimator_)
print(grid_search.best_score_)
print(grid_search.best_params_)
knn_clf = grid_search.best_estimator_
print(knn_clf.predict(X_test))
print(knn_clf.score(X_test, y_test))
# 并行搜索
grid_search = GridSearchCV(knn_clf, param_grid, n_jobs=-1, verbose=2)
start_time = timeit.default_timer()
grid_search.fit(X_train, y_train)
print(timeit.default_timer() - start_time)

(1797, 64)
0
0.9888888888888889
best_k = 4
best_score = 0.9916666666666667
best_method = uniform
best_k = 4
best_score = 0.9916666666666667
10.68357993
best_p = 2
best_k = 3
best_score = 0.9888888888888889
35.479140363
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=1, p=2,
weights='uniform')
0.9860820751064653
{'n_neighbors': 1, 'weights': 'uniform'}
[8 1 3 4 4 0 7 0 8 0 4 6 1 1 2 0 1 6 7 3 3 6 3 2 9 4 0 2 0 3 0 8 7 2 3 5 1
3 1 5 8 6 2 6 3 1 3 0 0 4 9 9 2 8 7 0 5 4 0 9 5 5 9 3 4 2 8 8 7 1 4 3 0 2
7 2 1 2 4 0 9 0 6 6 2 0 0 5 4 4 3 1 3 8 6 4 4 7 5 6 8 4 8 4 6 9 7 7 0 8 8
3 9 7 1 8 4 2 7 0 0 4 9 6 7 3 4 6 4 8 4 7 2 6 5 5 8 7 2 5 5 9 7 9 3 1 9 4
4 1 5 1 6 4 4 8 1 6 2 5 2 1 4 4 3 9 4 0 6 0 8 3 8 7 3 0 3 0 5 9 2 7 1 8 1
4 3 3 7 8 2 7 2 2 8 0 5 7 6 7 3 4 7 1 7 0 9 2 8 9 3 8 9 1 1 1 9 8 8 0 3 7
3 3 4 8 2 1 8 6 0 1 7 7 5 8 3 8 7 6 8 4 2 6 2 3 7 4 9 3 5 0 6 3 8 3 3 1 4
5 3 2 5 6 8 6 9 5 5 3 6 5 9 3 7 7 0 2 4 9 9 9 2 5 6 1 9 6 9 7 7 4 5 0 0 5
3 8 4 4 3 2 5 3 2 2 3 0 9 8 2 1 4 0 6 2 8 0 6 4 9 9 8 3 9 8 6 3 2 7 9 4 2
7 5 1 1 6 1 0 4 9 2 9 0 3 3 0 7 4 8 5 9 5 9 5 0 7 9 8]
0.9833333333333333
Fitting 5 folds for each of 60 candidates, totalling 300 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[CV] n_neighbors=1, weights=uniform ..................................
[CV] n_neighbors=1, weights=uniform ..................................
[CV] n_neighbors=1, weights=uniform ..................................
[CV] n_neighbors=1, weights=uniform ..................................
[CV] n_neighbors=1, weights=uniform ..................................
[CV] n_neighbors=2, weights=uniform ..................................
[CV] n_neighbors=2, weights=uniform ..................................
[CV] n_neighbors=2, weights=uniform ..................................
[CV] ................... n_neighbors=1, weights=uniform, total=   0.1s
[CV] n_neighbors=2, weights=uniform ..................................
[CV] n_neighbors=2, weights=uniform ..................................
[CV] ................... n_neighbors=1, weights=uniform, total=   0.1s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ................... n_neighbors=1, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=1, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=1, weights=uniform, total=   0.1s
[CV] n_neighbors=3, weights=uniform ..................................
[CV] n_neighbors=3, weights=uniform ..................................
[CV] n_neighbors=4, weights=uniform ..................................
[CV] n_neighbors=3, weights=uniform ..................................
[CV] n_neighbors=3, weights=uniform ..................................
[CV] ................... n_neighbors=2, weights=uniform, total=   0.1s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] n_neighbors=3, weights=uniform ..................................
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ................... n_neighbors=2, weights=uniform, total=   0.1s
[CV] n_neighbors=4, weights=uniform ..................................
[CV] ................... n_neighbors=2, weights=uniform, total=   0.1s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] ................... n_neighbors=2, weights=uniform, total=   0.1s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] ................... n_neighbors=2, weights=uniform, total=   0.1s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] n_neighbors=5, weights=uniform ..................................
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.9s
[CV] ................... n_neighbors=4, weights=uniform, total=   0.1s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] ................... n_neighbors=3, weights=uniform, total=   0.1s
[CV] n_neighbors=6, weights=uniform ..................................
[CV] ................... n_neighbors=3, weights=uniform, total=   0.1s
[CV] n_neighbors=6, weights=uniform ..................................
[CV] ................... n_neighbors=4, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=3, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=3, weights=uniform, total=   0.1s
[CV] n_neighbors=6, weights=uniform ..................................
[CV] ................... n_neighbors=4, weights=uniform, total=   0.1s
[CV] n_neighbors=6, weights=uniform ..................................
[CV] ................... n_neighbors=3, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=4, weights=uniform, total=   0.1s
[CV] n_neighbors=6, weights=uniform ..................................
[CV] ................... n_neighbors=4, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=5, weights=uniform, total=   0.1s
[CV] n_neighbors=7, weights=uniform ..................................
[CV] n_neighbors=7, weights=uniform ..................................
[CV] n_neighbors=7, weights=uniform ..................................
[CV] n_neighbors=7, weights=uniform ..................................
[CV] n_neighbors=7, weights=uniform ..................................
[CV] ................... n_neighbors=5, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=5, weights=uniform, total=   0.1s
[CV] n_neighbors=8, weights=uniform ..................................
[CV] ................... n_neighbors=5, weights=uniform, total=   0.1s
[CV] n_neighbors=8, weights=uniform ..................................
[CV] n_neighbors=8, weights=uniform ..................................
[CV] n_neighbors=8, weights=uniform ..................................
[CV] ................... n_neighbors=5, weights=uniform, total=   0.1s
[CV] n_neighbors=8, weights=uniform ..................................
[CV] ................... n_neighbors=6, weights=uniform, total=   0.1s
[CV] n_neighbors=9, weights=uniform ..................................
[CV] n_neighbors=9, weights=uniform ..................................
[CV] ................... n_neighbors=6, weights=uniform, total=   0.1s
[CV] n_neighbors=9, weights=uniform ..................................
[CV] ................... n_neighbors=6, weights=uniform, total=   0.1s
[CV] n_neighbors=9, weights=uniform ..................................
[CV] ................... n_neighbors=6, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=6, weights=uniform, total=   0.1s
[CV] n_neighbors=9, weights=uniform ..................................
[CV] n_neighbors=10, weights=uniform .................................
[CV] ................... n_neighbors=7, weights=uniform, total=   0.1s
[CV] n_neighbors=10, weights=uniform .................................
[CV] ................... n_neighbors=7, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=7, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=7, weights=uniform, total=   0.1s
[CV] n_neighbors=10, weights=uniform .................................
[CV] ................... n_neighbors=7, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=8, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=8, weights=uniform, total=   0.1s
[CV] n_neighbors=10, weights=uniform .................................
[CV] n_neighbors=10, weights=uniform .................................
[CV] ................... n_neighbors=8, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=1, weights=distance ............................
[CV] ................... n_neighbors=8, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=1, weights=distance ............................
[CV] n_neighbors=1, p=1, weights=distance ............................
[CV] ................... n_neighbors=8, weights=uniform, total=   0.1s
[CV] ................... n_neighbors=9, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=1, weights=distance ............................
[CV] ................... n_neighbors=9, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=1, weights=distance ............................
[CV] ................... n_neighbors=9, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=2, weights=distance ............................
[CV] n_neighbors=1, p=2, weights=distance ............................
[CV] ................... n_neighbors=9, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=2, weights=distance ............................
[CV] ................... n_neighbors=9, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=2, weights=distance ............................
[CV] .................. n_neighbors=10, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=2, weights=distance ............................
[CV] n_neighbors=1, p=3, weights=distance ............................
[CV] .................. n_neighbors=10, weights=uniform, total=   0.1s
[CV] n_neighbors=1, p=3, weights=distance ............................
[CV] .................. n_neighbors=10, weights=uniform, total=   0.0s
[CV] n_neighbors=1, p=3, weights=distance ............................
[CV] ............. n_neighbors=1, p=1, weights=distance, total=   0.0s
[CV] ............. n_neighbors=1, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=3, weights=distance ............................
[CV] ............. n_neighbors=1, p=1, weights=distance, total=   0.0s
[CV] .................. n_neighbors=10, weights=uniform, total=   0.1s
[CV] ............. n_neighbors=1, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=3, weights=distance ............................
[CV] .................. n_neighbors=10, weights=uniform, total=   0.1s
[CV] ............. n_neighbors=1, p=1, weights=distance, total=   0.0s
[CV] ............. n_neighbors=1, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=4, weights=distance ............................
[CV] ............. n_neighbors=1, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=4, weights=distance ............................
[CV] ............. n_neighbors=1, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=4, weights=distance ............................
[CV] n_neighbors=1, p=4, weights=distance ............................
[CV] n_neighbors=1, p=4, weights=distance ............................
[CV] ............. n_neighbors=1, p=2, weights=distance, total=   0.0s
[CV] ............. n_neighbors=1, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=1, p=5, weights=distance ............................
[CV] n_neighbors=1, p=5, weights=distance ............................
[CV] n_neighbors=1, p=5, weights=distance ............................
[CV] n_neighbors=1, p=5, weights=distance ............................
[CV] n_neighbors=1, p=5, weights=distance ............................
[CV] n_neighbors=2, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=2, weights=distance ............................
[CV] ............. n_neighbors=1, p=5, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=2, weights=distance ............................
[CV] ............. n_neighbors=1, p=4, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=5, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=5, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=5, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=2, weights=distance ............................
[CV] n_neighbors=2, p=3, weights=distance ............................
[CV] ............. n_neighbors=1, p=5, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=3, weights=distance ............................
[CV] ............. n_neighbors=1, p=3, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=4, weights=distance ............................
[CV] ............. n_neighbors=1, p=3, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=3, weights=distance, total=   0.3s
[CV] ............. n_neighbors=1, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=4, weights=distance ............................
[CV] ............. n_neighbors=2, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=2, weights=distance ............................
[CV] n_neighbors=2, p=4, weights=distance ............................
[CV] ............. n_neighbors=1, p=3, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=5, weights=distance ............................
[CV] ............. n_neighbors=1, p=3, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=5, weights=distance ............................
[CV] n_neighbors=3, p=1, weights=distance ............................
[CV] n_neighbors=3, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=2, weights=distance ............................
[CV] n_neighbors=3, p=1, weights=distance ............................
[CV] n_neighbors=3, p=2, weights=distance ............................
[CV] n_neighbors=3, p=2, weights=distance ............................
[CV] ............. n_neighbors=2, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=2, p=3, weights=distance ............................
[CV] n_neighbors=3, p=3, weights=distance ............................
[CV] ............. n_neighbors=2, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=3, weights=distance ............................
[CV] ............. n_neighbors=3, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=1, weights=distance ............................
[CV] ............. n_neighbors=2, p=2, weights=distance, total=   0.0s
[CV] ............. n_neighbors=3, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=2, weights=distance ............................
[CV] n_neighbors=3, p=3, weights=distance ............................
[CV] ............. n_neighbors=3, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=2, weights=distance ............................
[CV] ............. n_neighbors=3, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=2, weights=distance ............................
[CV] ............. n_neighbors=3, p=1, weights=distance, total=   0.0s
[CV] ............. n_neighbors=3, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=4, weights=distance ............................
[CV] ............. n_neighbors=3, p=2, weights=distance, total=   0.0s
[CV] ............. n_neighbors=3, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=4, weights=distance ............................
[CV] n_neighbors=3, p=5, weights=distance ............................
[CV] ............. n_neighbors=3, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=3, p=5, weights=distance ............................
[CV] n_neighbors=3, p=5, weights=distance ............................
[CV] ............. n_neighbors=2, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=4, weights=distance ............................
[CV] ............. n_neighbors=2, p=5, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=5, weights=distance ............................
[CV] ............. n_neighbors=2, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=4, weights=distance ............................
[CV] ............. n_neighbors=2, p=5, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=5, weights=distance ............................
[CV] ............. n_neighbors=2, p=4, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=5, weights=distance ............................
[CV] ............. n_neighbors=2, p=3, weights=distance, total=   0.3s
[CV] n_neighbors=2, p=3, weights=distance ............................
[CV] ............. n_neighbors=2, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=2, p=3, weights=distance ............................
[CV] ............. n_neighbors=2, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=3, weights=distance ............................
[CV] ............. n_neighbors=3, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=5, weights=distance ............................
[CV] ............. n_neighbors=3, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=3, weights=distance ............................
[CV] ............. n_neighbors=3, p=3, weights=distance, total=   0.4s
[CV] ............. n_neighbors=3, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=4, weights=distance ............................
[CV] n_neighbors=4, p=1, weights=distance ............................
[CV] ............. n_neighbors=4, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=5, weights=distance ............................
[CV] ............. n_neighbors=3, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=4, weights=distance ............................
[CV] ............. n_neighbors=3, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=3, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=1, weights=distance, total=   0.1s
[CV] ............. n_neighbors=4, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=1, weights=distance ............................
[CV] n_neighbors=4, p=2, weights=distance ............................
[CV] ............. n_neighbors=4, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=1, weights=distance ............................
[CV] ............. n_neighbors=4, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=2, weights=distance ............................
[CV] ............. n_neighbors=4, p=1, weights=distance, total=   0.1s
[CV] ............. n_neighbors=4, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=2, weights=distance ............................
[CV] n_neighbors=4, p=2, weights=distance ............................
[CV] ............. n_neighbors=2, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=3, weights=distance ............................
[CV] ............. n_neighbors=2, p=5, weights=distance, total=   0.4s
[CV] ............. n_neighbors=2, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=3, weights=distance ............................
[CV] n_neighbors=4, p=4, weights=distance ............................
[CV] ............. n_neighbors=2, p=4, weights=distance, total=   0.5s
[CV] ............. n_neighbors=2, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=3, weights=distance ............................
[CV] n_neighbors=4, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=4, p=2, weights=distance ............................
[CV] ............. n_neighbors=2, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=4, p=5, weights=distance ............................
[CV] ............. n_neighbors=2, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=5, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=5, p=1, weights=distance ............................
[CV] ............. n_neighbors=3, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=2, weights=distance ............................
[CV] ............. n_neighbors=3, p=5, weights=distance, total=   0.4s
[CV] ............. n_neighbors=3, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=5, p=2, weights=distance ............................
[CV] ............. n_neighbors=3, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=3, weights=distance ............................
[CV] ............. n_neighbors=3, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=5, p=3, weights=distance ............................
[CV] ............. n_neighbors=5, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=3, weights=distance ............................
[CV] n_neighbors=5, p=4, weights=distance ............................
[CV] ............. n_neighbors=5, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=2, weights=distance ............................
[CV] ............. n_neighbors=5, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=2, weights=distance ............................
[CV] ............. n_neighbors=5, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=2, weights=distance ............................
[CV] ............. n_neighbors=5, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=4, weights=distance ............................
[CV] ............. n_neighbors=5, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=5, weights=distance ............................
[CV] ............. n_neighbors=5, p=2, weights=distance, total=   0.1s
[CV] ............. n_neighbors=5, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=5, p=5, weights=distance ............................
[CV] n_neighbors=5, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=3, weights=distance ............................
[CV] ............. n_neighbors=4, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=3, weights=distance ............................
[CV] ............. n_neighbors=4, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=1, weights=distance ............................
[CV] ............. n_neighbors=4, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=4, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=4, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=6, p=1, weights=distance ............................
[CV] ............. n_neighbors=6, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=4, weights=distance ............................
[CV] ............. n_neighbors=5, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=4, weights=distance ............................
[CV] ............. n_neighbors=5, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=3, weights=distance ............................
[CV] ............. n_neighbors=5, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=4, weights=distance ............................
[CV] ............. n_neighbors=6, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=2, weights=distance ............................
[CV] ............. n_neighbors=5, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=5, weights=distance ............................
[CV] ............. n_neighbors=5, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=5, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=2, weights=distance ............................
[CV] ............. n_neighbors=6, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=2, weights=distance ............................
[CV] ............. n_neighbors=6, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=2, weights=distance ............................
[CV] ............. n_neighbors=4, p=5, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=2, weights=distance ............................
[CV] n_neighbors=6, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=3, weights=distance ............................
[CV] ............. n_neighbors=4, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=6, p=3, weights=distance ............................
[CV] ............. n_neighbors=4, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=6, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=4, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=6, p=4, weights=distance ............................
[CV] ............. n_neighbors=4, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=6, p=4, weights=distance ............................
[CV] n_neighbors=6, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=5, weights=distance, total=   0.3s
[CV] n_neighbors=6, p=5, weights=distance ............................
[CV] ............. n_neighbors=4, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=1, weights=distance ............................
[CV] ............. n_neighbors=7, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=7, p=1, weights=distance ............................
[CV] ............. n_neighbors=7, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=7, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=7, p=1, weights=distance ............................
[CV] ............. n_neighbors=5, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=2, weights=distance ............................
[CV] ............. n_neighbors=5, p=3, weights=distance, total=   0.4s
[CV] ............. n_neighbors=5, p=5, weights=distance, total=   0.4s
[Parallel(n_jobs=-1)]: Done 180 tasks      | elapsed:    3.3s
[CV] n_neighbors=7, p=3, weights=distance ............................
[CV] n_neighbors=7, p=3, weights=distance ............................
[CV] ............. n_neighbors=5, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=7, p=3, weights=distance ............................
[CV] ............. n_neighbors=5, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=4, weights=distance ............................
[CV] ............. n_neighbors=7, p=1, weights=distance, total=   0.2s
[CV] n_neighbors=7, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=2, weights=distance, total=   0.2s
[CV] n_neighbors=7, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=1, weights=distance, total=   0.2s
[CV] n_neighbors=7, p=4, weights=distance ............................
[CV] ............. n_neighbors=7, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=7, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=7, p=5, weights=distance ............................
[CV] ............. n_neighbors=7, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=7, p=5, weights=distance ............................
[CV] ............. n_neighbors=7, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=7, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=4, weights=distance ............................
[CV] ............. n_neighbors=6, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=4, weights=distance ............................
[CV] ............. n_neighbors=6, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=6, p=5, weights=distance ............................
[CV] ............. n_neighbors=6, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=1, weights=distance ............................
[CV] ............. n_neighbors=8, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=1, weights=distance ............................
[CV] ............. n_neighbors=8, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=1, weights=distance ............................
[CV] ............. n_neighbors=8, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=1, weights=distance ............................
[CV] ............. n_neighbors=8, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=7, p=3, weights=distance ............................
[CV] ............. n_neighbors=8, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=7, p=3, weights=distance ............................
[CV] ............. n_neighbors=7, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=4, weights=distance ............................
[CV] ............. n_neighbors=7, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=4, weights=distance ............................
[CV] ............. n_neighbors=7, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=4, weights=distance ............................
[CV] ............. n_neighbors=7, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=7, p=5, weights=distance ............................
[CV] ............. n_neighbors=7, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=8, p=1, weights=distance ............................
[CV] ............. n_neighbors=8, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=2, weights=distance ............................
[CV] ............. n_neighbors=8, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=2, weights=distance ............................
[CV] ............. n_neighbors=6, p=5, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=4, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=8, p=3, weights=distance ............................
[CV] n_neighbors=8, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=5, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=3, weights=distance, total=   0.4s
[CV] ............. n_neighbors=6, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=8, p=4, weights=distance ............................
[CV] n_neighbors=8, p=4, weights=distance ............................
[CV] ............. n_neighbors=8, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=4, weights=distance ............................
[CV] n_neighbors=8, p=5, weights=distance ............................
[CV] n_neighbors=8, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=2, weights=distance, total=   0.0s
[CV] n_neighbors=8, p=3, weights=distance ............................
[CV] ............. n_neighbors=6, p=3, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=1, weights=distance ............................
[CV] ............. n_neighbors=9, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=9, p=1, weights=distance ............................
[CV] ............. n_neighbors=9, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=9, p=1, weights=distance ............................
[CV] ............. n_neighbors=9, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=9, p=1, weights=distance ............................
[CV] ............. n_neighbors=9, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=9, p=1, weights=distance ............................
[CV] ............. n_neighbors=9, p=1, weights=distance, total=   0.0s
[CV] n_neighbors=9, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=3, weights=distance, total=   0.4s
[CV] ............. n_neighbors=7, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=2, weights=distance ............................
[CV] ............. n_neighbors=7, p=3, weights=distance, total=   0.4s
[CV] ............. n_neighbors=7, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=3, weights=distance ............................
[CV] ............. n_neighbors=7, p=4, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=3, weights=distance ............................
[CV] n_neighbors=9, p=3, weights=distance ............................
[CV] ............. n_neighbors=7, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=9, p=4, weights=distance ............................
[CV] n_neighbors=9, p=4, weights=distance ............................
[CV] ............. n_neighbors=9, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=9, p=5, weights=distance ............................
[CV] ............. n_neighbors=9, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=9, p=2, weights=distance ............................
[CV] ............. n_neighbors=9, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=9, p=2, weights=distance ............................
[CV] ............. n_neighbors=9, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=9, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=8, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=5, weights=distance, total=   0.4s
[CV] n_neighbors=8, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=4, weights=distance ............................
[CV] ............. n_neighbors=8, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=4, weights=distance ............................
[CV] ............. n_neighbors=8, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=3, weights=distance ............................
[CV] ............. n_neighbors=8, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=3, weights=distance ............................
[CV] ............. n_neighbors=8, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=8, p=5, weights=distance ............................
[CV] ............. n_neighbors=9, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=9, p=5, weights=distance ............................
[CV] ............. n_neighbors=8, p=3, weights=distance, total=   0.5s
[CV] n_neighbors=9, p=5, weights=distance ............................
[CV] ............. n_neighbors=9, p=4, weights=distance, total=   0.6s
[CV] n_neighbors=9, p=4, weights=distance ............................
[CV] ............. n_neighbors=9, p=3, weights=distance, total=   0.6s
[CV] n_neighbors=9, p=3, weights=distance ............................
[CV] ............. n_neighbors=9, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=9, p=5, weights=distance ............................
[CV] ............. n_neighbors=9, p=3, weights=distance, total=   0.6s
[CV] n_neighbors=9, p=4, weights=distance ............................
[CV] ............. n_neighbors=9, p=4, weights=distance, total=   0.6s
[CV] n_neighbors=9, p=4, weights=distance ............................
[CV] ............. n_neighbors=9, p=3, weights=distance, total=   0.6s
[CV] n_neighbors=9, p=3, weights=distance ............................
[CV] ............. n_neighbors=8, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=10, p=1, weights=distance ...........................
[CV] ............. n_neighbors=9, p=5, weights=distance, total=   0.5s
[CV] ............. n_neighbors=8, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=10, p=1, weights=distance ...........................
[CV] n_neighbors=10, p=1, weights=distance ...........................
[CV] ............. n_neighbors=8, p=4, weights=distance, total=   0.5s
[CV] ............. n_neighbors=8, p=5, weights=distance, total=   0.5s
[CV] n_neighbors=10, p=1, weights=distance ...........................
[CV] ............. n_neighbors=8, p=4, weights=distance, total=   0.5s
[CV] n_neighbors=10, p=1, weights=distance ...........................
[CV] n_neighbors=10, p=2, weights=distance ...........................
[CV] ............. n_neighbors=9, p=5, weights=distance, total=   0.5s
[CV] ............. n_neighbors=8, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=10, p=2, weights=distance ...........................
[CV] ............. n_neighbors=8, p=3, weights=distance, total=   0.6s
[CV] n_neighbors=10, p=2, weights=distance ...........................
[CV] n_neighbors=10, p=2, weights=distance ...........................
[CV] n_neighbors=10, p=2, weights=distance ...........................
[CV] ............. n_neighbors=9, p=5, weights=distance, total=   0.5s
[CV] ............ n_neighbors=10, p=1, weights=distance, total=   0.1s
[CV] ............ n_neighbors=10, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=10, p=3, weights=distance ...........................
[CV] n_neighbors=10, p=3, weights=distance ...........................
[CV] n_neighbors=10, p=3, weights=distance ...........................
[CV] ............ n_neighbors=10, p=1, weights=distance, total=   0.1s
[CV] ............ n_neighbors=10, p=1, weights=distance, total=   0.1s
[CV] n_neighbors=10, p=3, weights=distance ...........................
[CV] ............ n_neighbors=10, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=10, p=3, weights=distance ...........................
[CV] n_neighbors=10, p=4, weights=distance ...........................
[Parallel(n_jobs=-1)]: Done 269 out of 300 | elapsed:    5.0s remaining:    0.6s
[CV] ............ n_neighbors=10, p=2, weights=distance, total=   0.1s
[CV] ............ n_neighbors=10, p=2, weights=distance, total=   0.1s
[CV] ............ n_neighbors=10, p=2, weights=distance, total=   0.1s
[CV] ............ n_neighbors=10, p=2, weights=distance, total=   0.1s
[CV] n_neighbors=10, p=4, weights=distance ...........................
[CV] n_neighbors=10, p=4, weights=distance ...........................
[CV] n_neighbors=10, p=4, weights=distance ...........................
[CV] n_neighbors=10, p=4, weights=distance ...........................
[CV] ............. n_neighbors=9, p=5, weights=distance, total=   0.6s
[CV] n_neighbors=10, p=5, weights=distance ...........................
[CV] ............. n_neighbors=9, p=4, weights=distance, total=   0.6s
[CV] ............. n_neighbors=9, p=4, weights=distance, total=   0.6s
[CV] n_neighbors=10, p=5, weights=distance ...........................
[CV] n_neighbors=10, p=5, weights=distance ...........................
[CV] ............. n_neighbors=9, p=4, weights=distance, total=   0.6s
[CV] ............. n_neighbors=9, p=3, weights=distance, total=   0.6s
[CV] n_neighbors=10, p=5, weights=distance ...........................
[CV] n_neighbors=10, p=5, weights=distance ...........................
[CV] ............. n_neighbors=9, p=3, weights=distance, total=   0.7s
[CV] ............ n_neighbors=10, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=4, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=4, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=3, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=4, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=4, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=4, weights=distance, total=   0.6s
[CV] ............ n_neighbors=10, p=5, weights=distance, total=   0.4s
[CV] ............ n_neighbors=10, p=5, weights=distance, total=   0.4s
[CV] ............ n_neighbors=10, p=5, weights=distance, total=   0.4s
[CV] ............ n_neighbors=10, p=5, weights=distance, total=   0.4s
[CV] ............ n_neighbors=10, p=5, weights=distance, total=   0.4s
5.793163790999998

1. 向量空间余弦相似度 Cosine Similarity
3. 皮尔森相关系数 Pearson Correlation Coefficient
4. Jaccard相关系数 Jaccard Coeffient

# 线性回归法

1. 解决回归问题
2. 思想简单，实现容易
3. 许多强大的非线形模型的基础
4. 结果具有很好的可解释性
5. 蕴含机器学习中的很多重要思想

1. 线性回归
2. 多项式回归
3. 逻辑回归
4. SVM
5. 神经网络

，它们的推导也是通过偏导数来进行的，具体参考高等数学整理(二) ，这里的指的是x、y数据集的平均值。

import numpy as np

class SimpleLinearRegression:

def __init__(self):
# 初始化Simple Linear Regression模型
self._a = None
self._b = None

def fit(self, x_train, y_train):
# 根据训练数据集x_train,y_train训练Simple Linear Regression模型
assert x_train.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert len(x_train) == len(y_train), \
"x_train的长度必须与y_train的长度相等"
# 获取x、y训练数据集的平均值
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = 0
d = 0
for x, y in zip(x_train, y_train):
num += (x - x_mean) * (y - y_mean)
d += (x - x_mean)**2
self._a = num / d
self._b = y_mean - self._a * x_mean
return self

def predict(self, x_predict):
# 给定待预测数据集x_predict，返回表示x_predict的结果向量
assert x_predict.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert self._a is not None and self._b is not None, \
"开始预测前必须fit"
return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
# 给定单个待预测数据x_single，返回x_single的预测结果值
return self._a * x_single + self._b

def __repr__(self):
return "SimpleLinearRegression()"
from playLA.SimpleLinearRegression import SimpleLinearRegression
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

reg1 = SimpleLinearRegression()
x = np.array([1, 2, 3, 4, 5])
y = np.array([1, 3, 2, 3, 5])
reg1.fit(x, y)
x_predict = 6
print(reg1.predict(np.array([x_predict])))
print(reg1._a)
print(reg1._b)
y_hat1 = reg1.predict(x)
plt.scatter(x, y)
plt.plot(x, y_hat1, color="r")
plt.axis([0, 6, 0, 6])
plt.show()


[5.2]
0.8
0.39999999999999947

import numpy as np

class SimpleLinearRegressionVector:

def __init__(self):
# 初始化Simple Linear Regression模型
self._a = None
self._b = None

def fit(self, x_train, y_train):
# 根据训练数据集x_train,y_train训练Simple Linear Regression模型
assert x_train.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert len(x_train) == len(y_train), \
"x_train的长度必须与y_train的长度相等"
# 获取x、y训练数据集的平均值
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = (x_train - x_mean).dot(y_train - y_mean)
d = (x_train - x_mean).dot(x_train - x_mean)
self._a = num / d
self._b = y_mean - self._a * x_mean
return self

def predict(self, x_predict):
# 给定待预测数据集x_predict，返回表示x_predict的结果向量
assert x_predict.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert self._a is not None and self._b is not None, \
"开始预测前必须fit"
return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
# 给定单个待预测数据x_single，返回x_single的预测结果值
return self._a * x_single + self._b

def __repr__(self):
return "SimpleLinearRegressionVector()"
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

reg1 = SimpleLinearRegressionVector()
x = np.array([1, 2, 3, 4, 5])
y = np.array([1, 3, 2, 3, 5])
reg1.fit(x, y)
x_predict = 6
print(reg1.predict(np.array([x_predict])))
print(reg1._a)
print(reg1._b)
y_hat1 = reg1.predict(x)
plt.scatter(x, y)
plt.plot(x, y_hat1, color="r")
plt.axis([0, 6, 0, 6])
plt.show()


from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from playLA.SimpleLinearRegression import SimpleLinearRegression
import numpy as np
import timeit

if __name__ == "__main__":

m = 1000000
big_x = np.random.random(size=m)
big_y = big_x * 2 + 3 + np.random.normal(size=m)
reg1 = SimpleLinearRegression()
reg2 = SimpleLinearRegressionVector()
start_time = timeit.default_timer()
reg1.fit(big_x, big_y)
print(timeit.default_timer() - start_time)
start_time = timeit.default_timer()
reg2.fit(big_x, big_y)
print(timeit.default_timer() - start_time)
print(reg1._a)
print(reg1._b)
print(reg2._a)
print(reg2._b)

0.870928624
0.00943964399999997
2.0010333897209
2.998210238026
2.0010333897208623
2.9982102380260187

，这样量纲和y的量纲是一致的，这个衡量标准被称为均方根误差RMSE(Root Mean Squared Error)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**

:Number of Instances: 506

:Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

:Attribute Information (in order):
- CRIM     per capita crime rate by town
- ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
- INDUS    proportion of non-retail business acres per town
- CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- NOX      nitric oxides concentration (parts per 10 million)
- RM       average number of rooms per dwelling
- AGE      proportion of owner-occupied units built prior to 1940
- DIS      weighted distances to five Boston employment centres
- TAX      full-value property-tax rate per $10,000 - PTRATIO pupil-teacher ratio by town - B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town - LSTAT % lower status of the population - MEDV Median value of owner-occupied homes in$1000's

:Missing Attribute Values: None

:Creator: Harrison, D. and Rubinfeld, D.L.

This is a copy of UCI ML housing dataset.
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/

This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.

The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics
...', Wiley, 1980.   N.B. Various transformations are used in the table on
pages 244-261 of the latter.

The Boston house-price data has been used in many machine learning papers that address regression
problems.

.. topic:: References

- Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
- Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.


import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

# print(boston.DESCR)
print(boston.feature_names)

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
'B' 'LSTAT']

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)

(506,)
(506,)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
plt.scatter(x, y)
plt.show()

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
print(np.max(y))

(506,)
(506,)
50.0

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
plt.scatter(x, y)
plt.show()

import numpy as np

def train_test_split(X, y, test_ratio=0.2, seed=None):
"""将数据 X 和 y 按照test_ratio分割成X_train, X_test, y_train, y_test"""
assert X.shape[0] == y.shape[0], \
"the size of X must be equal to the size of y"
assert 0.0 <= test_ratio <= 1.0, \
"test_ration must be valid"

if seed:
np.random.seed(seed)

shuffled_indexes = np.random.permutation(len(X))

test_size = int(len(X) * test_ratio)
test_indexes = shuffled_indexes[:test_size]
train_indexes = shuffled_indexes[test_size:]

X_train = X[train_indexes]
y_train = y[train_indexes]

X_test = X[test_indexes]
y_test = y[test_indexes]

return X_train, X_test, y_train, y_test
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)

(506,)
(506,)
(392,)
(98,)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
plt.scatter(x_train, y_train)
plt.plot(x_train, reg.predict(x_train), color="r")
plt.show()

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
print(mse_test)
rmse_test = sqrt(mse_test)
print(rmse_test)
mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
print(mae_test)

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543
24.156602134387438
4.914936635846635
3.5430974409463873

import numpy as np
from math import sqrt

def accuracy_score(y_true, y_predict):
"""计算y_true和y_predict之间的准确率"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum(y_true == y_predict) / len(y_true)

def mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的MSE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum((y_true - y_predict)**2) / len(y_true)

def root_mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的RMSE"""

return sqrt(mean_squared_error(y_true, y_predict))

def mean_absolute_error(y_true, y_predict):
"""计算y_true和y_predict之间的MAE"""

return np.sum(np.absolute(y_true - y_predict)) / len(y_true)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
from playLA.metrics import *

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(root_mean_squared_error(y_test, y_predict))
print(mean_absolute_error(y_test, y_predict))

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543
24.156602134387438
4.914936635846635
3.5430974409463873

scikit_learn机器学习库中的MSE和MAE

scikit_learn中没有RMSE,所以我们要对MSE开根号就可以了

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
# from playLA.metrics import *
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(sqrt(mean_squared_error(y_test, y_predict)))
print(mean_absolute_error(y_test, y_predict))

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543
24.156602134387438
4.914936635846635
3.5430974409463873

RMSE vs MAE

RMSE和MAE的量纲都是一样的，但是我们在实际的实践中会发现RMSE会比MAE要大，这是因为RMSE是将错误值平方后再开根，如果有两个样本的错误值是100，在经过平方操作后，这个100的差距就扩大到了10000的差距，也就是说RMSE有放大我们预测结果和真实结果较大差距的趋势；而MAE是没有这样一个趋势的，它直接就反映的是样本的预测结果和真实结果的这一个差距，没有平方操作。也正是因为这个原因，从某种程度上来说，我们尽量的让RMSE这个值更加小，相对来讲它的意义更大一些，因为这背后就意味着我们整个样本的错误中，那个最大的错误值相应的比较小。

1. R^2 <= 1
2. R^2越大越好。当我们的预测模型不犯任何错误，R^2得到最大值1
3. 当我们的模型等于基准模型时，R^2为0
4. 如果R^2 < 0，说明我们学习到的模型还不如基准模型。此时，很有可能我们的数据不存在任何线性关系。

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
# from playLA.metrics import *
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(sqrt(mean_squared_error(y_test, y_predict)))
print(mean_absolute_error(y_test, y_predict))
# 计算R Square
print(1 - mean_squared_error(y_test, y_predict) / np.var(y_test))

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543
24.156602134387438
4.914936635846635
3.5430974409463873
0.6129316803937322

import numpy as np
from math import sqrt

def accuracy_score(y_true, y_predict):
"""计算y_true和y_predict之间的准确率"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum(y_true == y_predict) / len(y_true)

def mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的MSE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"

return np.sum((y_true - y_predict)**2) / len(y_true)

def root_mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的RMSE"""

return sqrt(mean_squared_error(y_true, y_predict))

def mean_absolute_error(y_true, y_predict):
"""计算y_true和y_predict之间的MAE"""

return np.sum(np.absolute(y_true - y_predict)) / len(y_true)

def r2_score(y_true, y_predict):
"""计算y_true和y_predict之间的R Square"""
return 1 - mean_squared_error(y_true, y_predict) / np.var(y_true)
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
# from playLA.metrics import *
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from playLA.metrics import r2_score

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(sqrt(mean_squared_error(y_test, y_predict)))
print(mean_absolute_error(y_test, y_predict))
# 计算R Square
# print(1 - mean_squared_error(y_test, y_predict) / np.var(y_test))
print(r2_score(y_test, y_predict))

(506,)
(506,)
(392,)
(98,)
7.8608543562689555
-27.459342806705543
24.156602134387438
4.914936635846635
3.5430974409463873
0.6129316803937322

scikit_learn中同样也有这个方法

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
# from playLA.metrics import *
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
# from playLA.metrics import r2_score
from sklearn.metrics import r2_score

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(sqrt(mean_squared_error(y_test, y_predict)))
print(mean_absolute_error(y_test, y_predict))
# 计算R Square
# print(1 - mean_squared_error(y_test, y_predict) / np.var(y_test))
print(r2_score(y_test, y_predict))

import numpy as np
from .metrics import r2_score

class SimpleLinearRegressionVector:

def __init__(self):
# 初始化Simple Linear Regression模型
self._a = None
self._b = None

def fit(self, x_train, y_train):
# 根据训练数据集x_train,y_train训练Simple Linear Regression模型
assert x_train.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert len(x_train) == len(y_train), \
"x_train的长度必须与y_train的长度相等"
# 获取x、y训练数据集的平均值
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = (x_train - x_mean).dot(y_train - y_mean)
d = (x_train - x_mean).dot(x_train - x_mean)
self._a = num / d
self._b = y_mean - self._a * x_mean
return self

def predict(self, x_predict):
# 给定待预测数据集x_predict，返回表示x_predict的结果向量
assert x_predict.ndim == 1, \
"简单线性回归仅能处理有1个特征的训练数据"
assert self._a is not None and self._b is not None, \
"开始预测前必须fit"
return np.array([self._predict(x) for x in x_predict])

def _predict(self, x_single):
# 给定单个待预测数据x_single，返回x_single的预测结果值
return self._a * x_single + self._b

def score(self, x_test, y_test):
# 根据测试数据集x_test和y_test确定当前模型的准确度
y_predict = self.predict(x_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "SimpleLinearRegressionVector()"
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.SimpleLinearRegressionVector import SimpleLinearRegressionVector
from math import sqrt
# from playLA.metrics import *
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
# from playLA.metrics import r2_score
from sklearn.metrics import r2_score

if __name__ == "__main__":

# print(boston.DESCR)
# print(boston.feature_names)
# 只使用房间数量这个特征
x = boston.data[:, 5]
print(x.shape)
y = boston.target
print(y.shape)
# plt.scatter(x, y)
# plt.show()
# print(np.max(y))
x = x[y < 50]
y = y[y < 50]
# plt.scatter(x, y)
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
print(x_train.shape)
print(x_test.shape)
reg = SimpleLinearRegressionVector()
reg.fit(x_train, y_train)
print(reg._a)
print(reg._b)
# plt.scatter(x_train, y_train)
# plt.plot(x_train, reg.predict(x_train), color="r")
# plt.show()
y_predict = reg.predict(x_test)
# mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
# print(mse_test)
# rmse_test = sqrt(mse_test)
# print(rmse_test)
# mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
# print(mae_test)
print(mean_squared_error(y_test, y_predict))
print(sqrt(mean_squared_error(y_test, y_predict)))
print(mean_absolute_error(y_test, y_predict))
# 计算R Square
# print(1 - mean_squared_error(y_test, y_predict) / np.var(y_test))
# print(r2_score(y_test, y_predict))
print(reg.score(x_test, y_test))

，则

，其中为截距(intercept)，为系数(coefficients)。在最终报告给我们的用户相应的结果的时候，有可能不是将整体报告给用户，而是将截距和系数分开。这样做的原因是在系数部分，每一个值都对应原来样本中的一个特征，这些系数可以用来描述特征，对于最终样本相应的贡献程度是怎样的；而截距和我们的样本的特征是不相干的，它只是一个偏移，所以我们把这两部分分开。

import numpy as np
from numpy.linalg import inv
from .metrics import r2_score

class LinearRegression:

def __init__(self):
# 初始化LinearRegression模型
self.coef = None  #系数
self.interception = None  #截距
self._theta = None

def fit_normal(self, X_train, y_train):
# 根据训练数据集X_train, y_train训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self.interception is not None and self.coef is not None, \
"开始预测前必须fit"
assert X_predict.shape[1] == len(self.coef), \
"预测的特征数必须与训练的特征数相等"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
# 根据测试数据集X_test和y_test确定当前模型的准确度
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"

from sklearn import datasets

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
print(X.shape)

(490, 13)

from sklearn import datasets
from playLA.model_selection import train_test_split
from playLA.LinearRegression import LinearRegression

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
reg = LinearRegression()
reg.fit_normal(X_train, y_train)
print(reg.coef)
print(reg.interception)
print(reg.score(X_test, y_test))

(490, 13)
[-1.20354261e-01  3.64423279e-02 -3.61493155e-02  5.12978140e-02
-1.15775825e+01  3.42740062e+00 -2.32311760e-02 -1.19487594e+00
2.60101728e-01 -1.40219119e-02 -8.35430488e-01  7.80472852e-03
-3.80923751e-01]
34.11739972320593
0.8129794056212907

from sklearn import datasets
# from playLA.model_selection import train_test_split
from sklearn.model_selection import train_test_split
# from playLA.LinearRegression import LinearRegression
from sklearn.linear_model import LinearRegression

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
print(X.shape)
# X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
reg = LinearRegression()
# reg.fit_normal(X_train, y_train)
reg.fit(X_train, y_train)
# print(reg.coef)
print(reg.coef_)
# print(reg.interception)
print(reg.intercept_)
print(reg.score(X_test, y_test))

(490, 13)
[-1.15625837e-01  3.13179564e-02 -4.35662825e-02 -9.73281610e-02
-1.09500653e+01  3.49898935e+00 -1.41780625e-02 -1.06249020e+00
2.46031503e-01 -1.23291876e-02 -8.79440522e-01  8.31653623e-03
-3.98593455e-01]
32.59756158869959
0.8009390227581041

from sklearn import datasets
from playLA.model_selection import train_test_split
# from sklearn.model_selection import train_test_split
# from playLA.LinearRegression import LinearRegression
from sklearn.linear_model import LinearRegression

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
reg = LinearRegression()
# reg.fit_normal(X_train, y_train)
reg.fit(X_train, y_train)
# print(reg.coef)
print(reg.coef_)
# print(reg.interception)
print(reg.intercept_)
print(reg.score(X_test, y_test))

(490, 13)
[-1.20354261e-01  3.64423279e-02 -3.61493155e-02  5.12978140e-02
-1.15775825e+01  3.42740062e+00 -2.32311760e-02 -1.19487594e+00
2.60101728e-01 -1.40219119e-02 -8.35430488e-01  7.80472852e-03
-3.80923751e-01]
34.117399723229845
0.8129794056212809

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LinearRegression

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
reg = LinearRegression()
# 此处对所有数据进行拟合，不做拆分成训练数据和测试数据
reg.fit(X, y)
print(reg.coef_)
# 对各个特征进行排序
print(np.argsort(reg.coef_))
# 查看排序后对应特征的名称
print(boston.feature_names[np.argsort(reg.coef_)])
print(boston.DESCR)

[-1.06715912e-01  3.53133180e-02 -4.38830943e-02  4.52209315e-01
-1.23981083e+01  3.75945346e+00 -2.36790549e-02 -1.21096549e+00
2.51301879e-01 -1.37774382e-02 -8.38180086e-01  7.85316354e-03
-3.50107918e-01]
[ 4  7 10 12  0  2  6  9 11  1  8  3  5]
['NOX' 'DIS' 'PTRATIO' 'LSTAT' 'CRIM' 'INDUS' 'AGE' 'TAX' 'B' 'ZN' 'RAD'
'CHAS' 'RM']
.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**

:Number of Instances: 506

:Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

:Attribute Information (in order):
- CRIM     per capita crime rate by town
- ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
- INDUS    proportion of non-retail business acres per town
- CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
- NOX      nitric oxides concentration (parts per 10 million)
- RM       average number of rooms per dwelling
- AGE      proportion of owner-occupied units built prior to 1940
- DIS      weighted distances to five Boston employment centres
- TAX      full-value property-tax rate per $10,000 - PTRATIO pupil-teacher ratio by town - B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town - LSTAT % lower status of the population - MEDV Median value of owner-occupied homes in$1000's

:Missing Attribute Values: None

:Creator: Harrison, D. and Rubinfeld, D.L.

This is a copy of UCI ML housing dataset.
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/

This dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.

The Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic
prices and the demand for clean air', J. Environ. Economics & Management,
vol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics
...', Wiley, 1980.   N.B. Various transformations are used in the table on
pages 244-261 of the latter.

The Boston house-price data has been used in many machine learning papers that address regression
problems.

.. topic:: References

- Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
- Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.


# 梯度下降法

1. 本身不是一个机器学习算法
2. 是一种基于搜索的最优化方法
3. 作用：最小化一个损失函数
4. 梯度上升法：最大化一个效用函数

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

# 随机生成100个2以内的随机数，设置种子保证每次运行随机数相同
np.random.seed(666)
x = 2 * np.random.random(size=100)
# 将生成的随机数建立对应的线性数据(带噪声)
y = x * 3 + 4 + np.random.normal(size=100)
# 将x构建成100行1列的矩阵
X = x.reshape(-1, 1)
print(X.shape)
plt.scatter(x, y)
plt.show()

(100, 1)

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

# 随机生成100个2以内的随机数，设置种子保证每次运行随机数相同
np.random.seed(666)
x = 2 * np.random.random(size=100)
# 将生成的随机数建立对应的线性数据(带噪声)
y = x * 3 + 4 + np.random.normal(size=100)
# 将x构建成100行1列的矩阵
X = x.reshape(-1, 1)
print(X.shape)
# plt.scatter(x, y)
# plt.show()
def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 对theta求偏导数，获取梯度向量
res = np.empty(len(theta))
res[0] = np.sum(X_b.dot(theta) - y)
for i in range(1, len(theta)):
res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
return res * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta
X_b = np.hstack([np.ones((len(X), 1)), X.reshape(-1, 1)])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b, y, initial_theta, eta)
print(theta)


(100, 1)
[4.02145786 3.00706277]

import numpy as np
from numpy.linalg import inv
from .metrics import r2_score

class LinearRegression:

def __init__(self):
# 初始化LinearRegression模型
self.coef = None  #系数
self.interception = None  #截距
self._theta = None

def fit_normal(self, X_train, y_train):
# 根据训练数据集X_train, y_train训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
# 根据训练数据集X_train,y_train，使用梯度下降法训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"

def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 对theta求偏导数，获取梯度向量
res = np.empty(len(theta))
res[0] = np.sum(X_b.dot(theta) - y)
for i in range(1, len(theta)):
res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
return res * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
"""
梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self.interception is not None and self.coef is not None, \
"开始预测前必须fit"
assert X_predict.shape[1] == len(self.coef), \
"预测的特征数必须与训练的特征数相等"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
# 根据测试数据集X_test和y_test确定当前模型的准确度
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"
import numpy as np
import matplotlib.pyplot as plt
from playLA.LinearRegression import LinearRegression

if __name__ == "__main__":

# 随机生成100个2以内的随机数，设置种子保证每次运行随机数相同
np.random.seed(666)
x = 2 * np.random.random(size=100)
# 将生成的随机数建立对应的线性数据(带噪声)
y = x * 3 + 4 + np.random.normal(size=100)
# 将x构建成100行1列的矩阵
X = x.reshape(-1, 1)
print(X.shape)
# plt.scatter(x, y)
# plt.show()
def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 对theta求偏导数，获取梯度向量
res = np.empty(len(theta))
res[0] = np.sum(X_b.dot(theta) - y)
for i in range(1, len(theta)):
res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
return res * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta
X_b = np.hstack([np.ones((len(X), 1)), X.reshape(-1, 1)])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b, y, initial_theta, eta)
print(theta)
reg = LinearRegression()
reg.fit_gd(X, y)
print(reg.coef)
print(reg.interception)


(100, 1)
[4.02145786 3.00706277]
[3.00706277]
4.021457858204859

import numpy as np
from numpy.linalg import inv
from .metrics import r2_score

class LinearRegression:

def __init__(self):
# 初始化LinearRegression模型
self.coef = None  #系数
self.interception = None  #截距
self._theta = None

def fit_normal(self, X_train, y_train):
# 根据训练数据集X_train, y_train训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
# 根据训练数据集X_train,y_train，使用梯度下降法训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"

def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 对theta求偏导数，获取梯度向量
# res = np.empty(len(theta))
# res[0] = np.sum(X_b.dot(theta) - y)
# for i in range(1, len(theta)):
#     res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
# return res * 2 / len(X_b)
return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
"""
梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self.interception is not None and self.coef is not None, \
"开始预测前必须fit"
assert X_predict.shape[1] == len(self.coef), \
"预测的特征数必须与训练的特征数相等"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
# 根据测试数据集X_test和y_test确定当前模型的准确度
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"

import numpy as np
from sklearn import datasets
from playLA.LinearRegression import LinearRegression
from playLA.model_selection import train_test_split
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
reg = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
reg.fit_normal(X_train, y_train)
print(reg.score(X_test, y_test))
reg2 = LinearRegression()
start_time = timeit.default_timer()
reg2.fit_gd(X_train, y_train)
print(reg2.coef)
print(reg2.score(X_test, y_test))
print(X_train[:10, :])
print(timeit.default_timer() - start_time)

0.8129794056212907
/opt/anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py:90: RuntimeWarning: overflow encountered in reduce
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
[nan nan nan nan nan nan nan nan nan nan nan nan nan]
nan
[[1.42362e+01 0.00000e+00 1.81000e+01 0.00000e+00 6.93000e-01 6.34300e+00
1.00000e+02 1.57410e+00 2.40000e+01 6.66000e+02 2.02000e+01 3.96900e+02
2.03200e+01]
[3.67822e+00 0.00000e+00 1.81000e+01 0.00000e+00 7.70000e-01 5.36200e+00
9.62000e+01 2.10360e+00 2.40000e+01 6.66000e+02 2.02000e+01 3.80790e+02
1.01900e+01]
[1.04690e-01 4.00000e+01 6.41000e+00 1.00000e+00 4.47000e-01 7.26700e+00
4.90000e+01 4.78720e+00 4.00000e+00 2.54000e+02 1.76000e+01 3.89250e+02
6.05000e+00]
[1.15172e+00 0.00000e+00 8.14000e+00 0.00000e+00 5.38000e-01 5.70100e+00
9.50000e+01 3.78720e+00 4.00000e+00 3.07000e+02 2.10000e+01 3.58770e+02
1.83500e+01]
[6.58800e-02 0.00000e+00 2.46000e+00 0.00000e+00 4.88000e-01 7.76500e+00
8.33000e+01 2.74100e+00 3.00000e+00 1.93000e+02 1.78000e+01 3.95560e+02
7.56000e+00]
[2.49800e-02 0.00000e+00 1.89000e+00 0.00000e+00 5.18000e-01 6.54000e+00
5.97000e+01 6.26690e+00 1.00000e+00 4.22000e+02 1.59000e+01 3.89960e+02
8.65000e+00]
[7.75223e+00 0.00000e+00 1.81000e+01 0.00000e+00 7.13000e-01 6.30100e+00
8.37000e+01 2.78310e+00 2.40000e+01 6.66000e+02 2.02000e+01 2.72210e+02
1.62300e+01]
[9.88430e-01 0.00000e+00 8.14000e+00 0.00000e+00 5.38000e-01 5.81300e+00
1.00000e+02 4.09520e+00 4.00000e+00 3.07000e+02 2.10000e+01 3.94540e+02
1.98800e+01]
[1.14320e-01 0.00000e+00 8.56000e+00 0.00000e+00 5.20000e-01 6.78100e+00
7.13000e+01 2.85610e+00 5.00000e+00 3.84000e+02 2.09000e+01 3.95580e+02
7.67000e+00]
[5.69175e+00 0.00000e+00 1.81000e+01 0.00000e+00 5.83000e-01 6.11400e+00
7.98000e+01 3.54590e+00 2.40000e+01 6.66000e+02 2.02000e+01 3.92680e+02
1.49800e+01]]
0.29687309100000003

import numpy as np
from sklearn import datasets
from playLA.LinearRegression import LinearRegression
from playLA.model_selection import train_test_split
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
reg = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
reg.fit_normal(X_train, y_train)
print(reg.score(X_test, y_test))
reg2 = LinearRegression()
start_time = timeit.default_timer()
reg2.fit_gd(X_train, y_train, eta=0.000001)
print(reg2.coef)
print(reg2.score(X_test, y_test))
# print(X_train[:10, :])
print(timeit.default_timer() - start_time)

0.8129794056212907
[-0.10245704  0.11535876 -0.06248791  0.00207516  0.00447152  0.11954208
0.04684195  0.03460927 -0.00452122  0.00324507  0.1271939   0.04484862
-0.22542441]
0.27586818724477224
0.301082199

import numpy as np
from sklearn import datasets
from playLA.LinearRegression import LinearRegression
from playLA.model_selection import train_test_split
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
reg = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
reg.fit_normal(X_train, y_train)
print(reg.score(X_test, y_test))
reg2 = LinearRegression()
start_time = timeit.default_timer()
reg2.fit_gd(X_train, y_train, eta=0.000001, n_iters=1e6)
print(reg2.coef)
print(reg2.score(X_test, y_test))
# print(X_train[:10, :])
print(timeit.default_timer() - start_time)

0.8129794056212907
[-1.07889200e-01  5.91494760e-02 -5.72920411e-02  1.19334353e-01
2.07223623e-01  3.91254775e+00  1.50564949e-03 -5.36511902e-01
1.13424276e-01 -9.76209406e-03  5.35544815e-02  1.58440412e-02
-3.78786162e-01]
0.7542932581943915
27.325919068999998

import numpy as np
from sklearn import datasets
from playLA.LinearRegression import LinearRegression
from playLA.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
reg = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
reg.fit_normal(X_train, y_train)
print(reg.score(X_test, y_test))
reg2 = LinearRegression()
# 将训练数据进行归一化处理
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)
start_time = timeit.default_timer()
reg2.fit_gd(X_train_standard, y_train)
print(reg2.coef)
X_test_standard = standardScaler.transform(X_test)
print(reg2.score(X_test_standard, y_test))
# print(X_train[:10, :])
print(timeit.default_timer() - start_time)

0.8129794056212907
[-1.04042202  0.83093351 -0.24794356  0.01179456 -1.35034756  2.25074
-0.66384353 -2.53568774  2.25572406 -2.34011572 -1.76565394  0.70923397
-2.72677064]
0.8129873310487505
0.13547030499999996

import numpy as np
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

# 样本数
m = 1000
# 特征值
n = 5000
# 随机化生成一个自变量正太分布矩阵
big_X = np.random.normal(size=(m, n))
# 随机化生成一个0到100之间的系数向量，长度为n+1
true_theta = np.random.uniform(0, 100, size=n+1)
# 生成因变量向量(含噪音，均值为0，标准差为10)
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0, 10, size=m)
big_reg1 = LinearRegression()
start_time = timeit.default_timer()
big_reg1.fit_normal(big_X, big_y)
print(timeit.default_timer() - start_time)
big_reg2 = LinearRegression()
start_time = timeit.default_timer()
big_reg2.fit_gd(big_X, big_y)
print(timeit.default_timer() - start_time)


1.9443837830000001
3.53725637

import numpy as np
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

# 样本数
m = 1000
# 特征值
n = 6000
# 随机化生成一个自变量正太分布矩阵
big_X = np.random.normal(size=(m, n))
# 随机化生成一个0到100之间的系数向量，长度为n+1
true_theta = np.random.uniform(0, 100, size=n+1)
# 生成因变量向量(含噪音，均值为0，标准差为10)
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0, 10, size=m)
big_reg1 = LinearRegression()
start_time = timeit.default_timer()
big_reg1.fit_normal(big_X, big_y)
print(timeit.default_timer() - start_time)
big_reg2 = LinearRegression()
start_time = timeit.default_timer()
big_reg2.fit_gd(big_X, big_y)
print(timeit.default_timer() - start_time)


3.018317079
3.375811186
import numpy as np
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

# 样本数
m = 1000
# 特征值
n = 7000
# 随机化生成一个自变量正太分布矩阵
big_X = np.random.normal(size=(m, n))
# 随机化生成一个0到100之间的系数向量，长度为n+1
true_theta = np.random.uniform(0, 100, size=n+1)
# 生成因变量向量(含噪音，均值为0，标准差为10)
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0, 10, size=m)
big_reg1 = LinearRegression()
start_time = timeit.default_timer()
big_reg1.fit_normal(big_X, big_y)
print(timeit.default_timer() - start_time)
big_reg2 = LinearRegression()
start_time = timeit.default_timer()
big_reg2.fit_gd(big_X, big_y)
print(timeit.default_timer() - start_time)


4.479472085
3.2846188210000005
import numpy as np
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

# 样本数
m = 1000
# 特征值
n = 8000
# 随机化生成一个自变量正太分布矩阵
big_X = np.random.normal(size=(m, n))
# 随机化生成一个0到100之间的系数向量，长度为n+1
true_theta = np.random.uniform(0, 100, size=n+1)
# 生成因变量向量(含噪音，均值为0，标准差为10)
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0, 10, size=m)
big_reg1 = LinearRegression()
start_time = timeit.default_timer()
big_reg1.fit_normal(big_X, big_y)
print(timeit.default_timer() - start_time)
big_reg2 = LinearRegression()
start_time = timeit.default_timer()
big_reg2.fit_gd(big_X, big_y)
print(timeit.default_timer() - start_time)


6.36739793
2.9523499839999996
import numpy as np
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

# 样本数
m = 1000
# 特征值
n = 9000
# 随机化生成一个自变量正太分布矩阵
big_X = np.random.normal(size=(m, n))
# 随机化生成一个0到100之间的系数向量，长度为n+1
true_theta = np.random.uniform(0, 100, size=n+1)
# 生成因变量向量(含噪音，均值为0，标准差为10)
big_y = big_X.dot(true_theta[1:]) + true_theta[0] + np.random.normal(0, 10, size=m)
big_reg1 = LinearRegression()
start_time = timeit.default_timer()
big_reg1.fit_normal(big_X, big_y)
print(timeit.default_timer() - start_time)
big_reg2 = LinearRegression()
start_time = timeit.default_timer()
big_reg2.fit_gd(big_X, big_y)
print(timeit.default_timer() - start_time)


8.681674721
2.8626002980000003

(学习率、i_iters迭代次数，随着迭代次数的增多，越来越小)

import numpy as np
import timeit

if __name__ == "__main__":

m = 1000000
# 生成100万条自变量随机数据向量
np.random.seed(666)
x = np.random.normal(size=m)
# 整理成m*1的矩阵
X = x.reshape(-1, 1)
# 生成因变量向量(带噪音，均值为0，标准差为3)
y = 4 * x + 3 + np.random.normal(0, 3, size=m)
def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 获取梯度向量
return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
"""
批量梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

start_time = timeit.default_timer()
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b, y, initial_theta, eta)
print(theta)
print(timeit.default_timer() - start_time)


[3.00375106 4.00219484]
6.453924730000001

import numpy as np
import timeit

if __name__ == "__main__":

m = 1000000
# 生成100万条自变量随机数据向量
np.random.seed(666)
x = np.random.normal(size=m)
# 整理成m*1的矩阵
X = x.reshape(-1, 1)
# 生成因变量向量(带噪音，均值为0，标准差为3)
y = 4 * x + 3 + np.random.normal(0, 3, size=m)
def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 获取梯度向量
return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
"""
批量梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

start_time = timeit.default_timer()
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b, y, initial_theta, eta)
print(theta)
print(timeit.default_timer() - start_time)
def dJ_sgd(theta, X_b_i, y_i):
# 获取某一个随机梯度值
return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2

def sgd(X_b, y, initial_theta, n_iters):
"""
随机梯度下降法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量
:param n_iters: 最大迭代次数
:return:
"""
t0 = 5
t1 = 50
def learning_rate(t):
# 随机梯度下降法的学习率
return t0 / (t + t1)
theta = initial_theta
for cur_iter in range(n_iters):
rand_i = np.random.randint(len(X_b))
theta = theta - learning_rate(cur_iter) * gradient
return theta

start_time = timeit.default_timer()
initial_theta = np.zeros(X_b.shape[1])
theta = sgd(X_b, y, initial_theta, n_iters=len(X_b) // 3)
print(theta)
print(timeit.default_timer() - start_time)

[3.00375106 4.00219484]
6.453924730000001
[3.01543883 3.98431204]
4.022960640000001

scikit-learn中的随机梯度下降法

import numpy as np
from numpy.linalg import inv
from .metrics import r2_score

class LinearRegression:

def __init__(self):
# 初始化LinearRegression模型
self.coef = None  #系数
self.interception = None  #截距
self._theta = None

def fit_normal(self, X_train, y_train):
# 根据训练数据集X_train, y_train训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4):
# 根据训练数据集X_train,y_train，使用梯度下降法训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"

def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 对theta求偏导数，获取梯度向量
# res = np.empty(len(theta))
# res[0] = np.sum(X_b.dot(theta) - y)
# for i in range(1, len(theta)):
#     res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
# return res * 2 / len(X_b)
return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
"""
梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def fit_sgd(self, X_train, y_train, n_iters=5, t0=5, t1=50):
# 根据训练数据集X_train,y_train，使用随机梯度下降法训练Linear Regression模型
assert X_train.shape[0] == y_train.shape[0], \
"X_train的列数必须等于y_train的长度"
assert n_iters >= 1

def dJ_sgd(theta, X_b_i, y_i):
# 获取某一个随机梯度值
return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2

def sgd(X_b, y, initial_theta, n_iters, t0=5, t1=50):
"""
随机梯度下降法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量
:param n_iters: 最大迭代次数
:return:
"""
def learning_rate(t):
# 随机梯度下降法的学习率
return t0 / (t + t1)

theta = initial_theta
m = len(X_b)
# 将样本数量看5遍
for cur_iter in range(n_iters):
indexes = np.random.permutation(m)
X_b_new = X_b[indexes]
y_new = y[indexes]
for i in range(m):
theta = theta - learning_rate(cur_iter * m + i) * gradient
return theta
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.random.randn(X_b.shape[1])
self._theta = sgd(X_b, y_train, initial_theta, n_iters, t0, t1)
self.interception = self._theta[0]
self.coef = self._theta[1:]
return self

def predict(self, X_predict):
# 给定待预测数据集X_predict,返回表示X_predict的结果向量
assert self.interception is not None and self.coef is not None, \
"开始预测前必须fit"
assert X_predict.shape[1] == len(self.coef), \
"预测的特征数必须与训练的特征数相等"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)

def score(self, X_test, y_test):
# 根据测试数据集X_test和y_test确定当前模型的准确度
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)

def __repr__(self):
return "LinearRegression()"
import numpy as np
import timeit
from playLA.LinearRegression import LinearRegression

if __name__ == "__main__":

m = 1000000
# 生成100万条自变量随机数据向量
np.random.seed(666)
x = np.random.normal(size=m)
# 整理成m*1的矩阵
X = x.reshape(-1, 1)
# 生成因变量向量(带噪音，均值为0，标准差为3)
y = 4 * x + 3 + np.random.normal(0, 3, size=m)
def J(theta, X_b, y):
# 构建损失函数
try:
return np.sum((y - X_b.dot(theta))**2) / len(X_b)
except:
return float('inf')

def dJ(theta, X_b, y):
# 获取梯度向量
return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b)

def gradient_descent(X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
"""
批量梯度下降算法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 迭代步长、学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
theta = initial_theta
# 真实迭代次数
i_iter = 0
while i_iter < n_iters:
# 获取梯度
last_theta = theta
# 迭代更新theta，不断顺着梯度方向寻找新的theta
theta = theta - eta * gradient
# 计算前后两次迭代后的损失函数差值的绝对值
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
break
# 更新迭代次数
i_iter += 1
return theta

start_time = timeit.default_timer()
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(X_b, y, initial_theta, eta)
print(theta)
print(timeit.default_timer() - start_time)
def dJ_sgd(theta, X_b_i, y_i):
# 获取某一个随机梯度值
return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2

def sgd(X_b, y, initial_theta, n_iters):
"""
随机梯度下降法
:param X_b: 带虚拟特征1的自变量特征矩阵
:param y: 因变量向量
:param initial_theta: 初始的常数向量
:param n_iters: 最大迭代次数
:return:
"""
t0 = 5
t1 = 50
def learning_rate(t):
# 随机梯度下降法的学习率
return t0 / (t + t1)
theta = initial_theta
for cur_iter in range(n_iters):
rand_i = np.random.randint(len(X_b))
theta = theta - learning_rate(cur_iter) * gradient
return theta

start_time = timeit.default_timer()
initial_theta = np.zeros(X_b.shape[1])
theta = sgd(X_b, y, initial_theta, n_iters=len(X_b) // 3)
print(theta)
print(timeit.default_timer() - start_time)
reg = LinearRegression()
reg.fit_sgd(X, y, n_iters=2)
print(reg.coef)
print(reg.interception)

[3.00375106 4.00219484]
5.302007303
[3.01543883 3.98431204]
4.018497287
[4.0004335]
3.0050395227201014

from sklearn import datasets
from playLA.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)
X_test_standard = standardScaler.transform(X_test)
reg = LinearRegression()
start_time = timeit.default_timer()
reg.fit_sgd(X_train_standard, y_train, n_iters=2)
print(timeit.default_timer() - start_time)
print(reg.score(X_test_standard, y_test))


0.005665511000000012
0.7857275413602652

from sklearn import datasets
from playLA.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from playLA.LinearRegression import LinearRegression
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)
X_test_standard = standardScaler.transform(X_test)
reg = LinearRegression()
start_time = timeit.default_timer()
reg.fit_sgd(X_train_standard, y_train, n_iters=50)
print(timeit.default_timer() - start_time)
print(reg.score(X_test_standard, y_test))


0.07702159399999997
0.8128901094766997

from sklearn import datasets
from playLA.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from playLA.LinearRegression import LinearRegression
from sklearn.linear_model import SGDRegressor
import timeit

if __name__ == "__main__":

X = boston.data
y = boston.target
X = X[y < 50]
y = y[y < 50]
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)
X_test_standard = standardScaler.transform(X_test)
reg = LinearRegression()
start_time = timeit.default_timer()
reg.fit_sgd(X_train_standard, y_train, n_iters=50)
print(timeit.default_timer() - start_time)
print(reg.score(X_test_standard, y_test))
sgd_reg = SGDRegressor()
start_time = timeit.default_timer()
sgd_reg.fit(X_train_standard, y_train)
print(timeit.default_timer() - start_time)
print(sgd_reg.score(X_test_standard, y_test))

0.07724935
0.8128901094766997
0.001499403999999982
0.8130319307073004

# PCA与梯度上升法

1. 一个非监督的机器学习算法
2. 主要用于数据的降维
3. 通过降维，可以发现更便于人类理解的特征
4. 其他应用：可视化；去噪

，这里就是均值，在概率论中称为数学期望。

=，故

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
plt.scatter(X[:, 0], X[:, 1])
plt.show()

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
# plt.scatter(X[:, 0], X[:, 1])
# plt.show()

def demean(X):
return X - np.mean(X, axis=0)
X_demean = demean(X)
plt.scatter(X_demean[:, 0], X_demean[:, 1])
plt.show()

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df_math(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def df_debug(w, X, epsilon=0.0001):
res = np.empty(len(w))
for i in range(len(w)):
w_1 = w.copy()
w_1[i] += epsilon
w_2 = w.copy()
w_2[i] -= epsilon
res[i] = (f(w_1, X) - f(w_2, X)) / (2 * epsilon)
return res

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def gradient_ascent(df, X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法
:param df: 梯度函数
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
# plt.scatter(X[:, 0], X[:, 1])
# plt.show()

def demean(X):
return X - np.mean(X, axis=0)
X_demean = demean(X)
# plt.scatter(X_demean[:, 0], X_demean[:, 1])
# plt.show()

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df_math(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def df_debug(w, X, epsilon=0.0001):
res = np.empty(len(w))
for i in range(len(w)):
w_1 = w.copy()
w_1[i] += epsilon
w_2 = w.copy()
w_2[i] -= epsilon
res[i] = (f(w_1, X) - f(w_2, X)) / (2 * epsilon)
return res

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def gradient_ascent(df, X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法
:param df: 梯度函数
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.001
# 不能使用StandardScaler标准化数据
print(gradient_ascent(df_math, X_demean, initial_w, eta))

[0.76912318 0.63910057]
[0.76912318 0.63910057]

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
# plt.scatter(X[:, 0], X[:, 1])
# plt.show()

def demean(X):
return X - np.mean(X, axis=0)
X_demean = demean(X)
# plt.scatter(X_demean[:, 0], X_demean[:, 1])
# plt.show()

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df_math(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def df_debug(w, X, epsilon=0.0001):
res = np.empty(len(w))
for i in range(len(w)):
w_1 = w.copy()
w_1[i] += epsilon
w_2 = w.copy()
w_2[i] -= epsilon
res[i] = (f(w_1, X) - f(w_2, X)) / (2 * epsilon)
return res

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def gradient_ascent(df, X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法
:param df: 梯度函数
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.001
# 不能使用StandardScaler标准化数据
w = gradient_ascent(df_math, X_demean, initial_w, eta)
plt.scatter(X_demean[:, 0], X_demean[:, 1])
plt.plot([0, w[0] * 100], [0, w[1] * 100], color='r')
plt.show()

X2 = np.empty((100, 2))
X2[:, 0] = np.random.uniform(0, 100, size=100)
X2[:, 1] = 0.75 * X2[:, 0] + 3
plt.scatter(X2[:, 0], X2[:, 1])
plt.show()

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
# plt.scatter(X[:, 0], X[:, 1])
# plt.show()

def demean(X):
return X - np.mean(X, axis=0)
X_demean = demean(X)
# plt.scatter(X_demean[:, 0], X_demean[:, 1])
# plt.show()

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df_math(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def df_debug(w, X, epsilon=0.0001):
res = np.empty(len(w))
for i in range(len(w)):
w_1 = w.copy()
w_1[i] += epsilon
w_2 = w.copy()
w_2[i] -= epsilon
res[i] = (f(w_1, X) - f(w_2, X)) / (2 * epsilon)
return res

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def gradient_ascent(df, X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法
:param df: 梯度函数
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.001
# 不能使用StandardScaler标准化数据
w = gradient_ascent(df_math, X_demean, initial_w, eta)
# plt.scatter(X_demean[:, 0], X_demean[:, 1])
# plt.plot([0, w[0] * 100], [0, w[1] * 100], color='r')
# plt.show()
X2 = np.empty((100, 2))
X2[:, 0] = np.random.uniform(0, 100, size=100)
X2[:, 1] = 0.75 * X2[:, 0] + 3
# plt.scatter(X2[:, 0], X2[:, 1])
# plt.show()
X2_demean = demean(X2)
w2 = gradient_ascent(df_math, X2_demean, initial_w, eta)
plt.scatter(X2_demean[:, 0], X2_demean[:, 1])
plt.plot([0, w2[0] * 100], [0, w2[1] * 100], color='r')
plt.show()

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)

def demean(X):
return X - np.mean(X, axis=0)
X = demean(X)

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def first_component(X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法，求出第一主成分
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.01
# 不能使用StandardScaler标准化数据
w = first_component(X, initial_w, eta)


import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)

def demean(X):
return X - np.mean(X, axis=0)
X = demean(X)

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def first_component(X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法，求出第一主成分
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.01
# 不能使用StandardScaler标准化数据
w = first_component(X, initial_w, eta)
X2 = np.empty(X.shape)
# 求第二主成分
X2 = X - X.dot(w).reshape(-1, 1) * w
plt.scatter(X2[:, 0], X2[:, 1])
plt.show()

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)

def demean(X):
return X - np.mean(X, axis=0)
X = demean(X)

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def first_component(X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法，求出第一主成分
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.01
# 不能使用StandardScaler标准化数据
w = first_component(X, initial_w, eta)
X2 = np.empty(X.shape)
# 求第二主成分
X2 = X - X.dot(w).reshape(-1, 1) * w
# plt.scatter(X2[:, 0], X2[:, 1])
# plt.show()
w2 = first_component(X2, initial_w, eta)
print(w.dot(w2))

2.6676799232960846e-06

import numpy as np
import matplotlib.pyplot as plt

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)

def demean(X):
return X - np.mean(X, axis=0)
X = demean(X)

def f(w, X):
# 目标函数
return np.sum((X.dot(w)**2)) / len(X)

def df(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def first_component(X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
"""
梯度上升法，求出第一主成分
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

# 不能从0向量开始
initial_w = np.random.random(X.shape[1])
eta = 0.01
# 不能使用StandardScaler标准化数据
w = first_component(X, initial_w, eta)
X2 = np.empty(X.shape)
# 求第二主成分
X2 = X - X.dot(w).reshape(-1, 1) * w
# plt.scatter(X2[:, 0], X2[:, 1])
# plt.show()
w2 = first_component(X2, initial_w, eta)
# print(w.dot(w2))

def first_n_components(n, X, eta = 0.01, n_iters = 1e4, epsilon=1e-8):
X_pca = X.copy()
X_pca = demean(X_pca)
res = []
for i in range(n):
initial_w = np.random.random(X_pca.shape[1])
w = first_component(X_pca, initial_w, eta)
res.append(w)
X_pca = X_pca - X_pca.dot(w).reshape(-1, 1) * w
return res

print(first_n_components(2, X))

[array([0.79120886, 0.61154603]), array([-0.61154382,  0.79121057])]

k就是新数据的维度，样本数依然为m个。当然如果我们想把低维数据恢复成高维数据

import numpy as np

class PCA:

def __init__(self, n_components):
"""
初始化PCA
:param n_components: 主成分的个数
"""
assert n_components >= 1, "n_components必须有效"
self._n_components = n_components
self.components_ = None

def fit(self, X, eta=0.01, n_iters=1e4):
# 获得数据集X的前n个主成分
assert self._n_components <= X.shape[1], \
"n_components必须小于等于X的特征数"

def demean(X):
return X - np.mean(X, axis=0)

def f(w, X):
# 目标函数
return np.sum((X.dot(w) ** 2)) / len(X)

def df(w, X):
# 梯度
return X.T.dot(X.dot(w)) * 2 / len(X)

def direction(w):
# 把w变成单位向量，只表示方向
return w / np.linalg.norm(w)

def first_component(X, initial_w, eta, n_iters=1e4, epsilon=1e-8):
"""
梯度上升法，求出第一主成分
:param X: 数据矩阵
:param initial_w: 初始的常数向量，这里需要注意的是真正待求的是常数向量，求偏导的也是常数向量
:param eta: 步长，学习率
:param n_iters: 最大迭代次数
:param epsilon: 误差值
:return:
"""
# 将初始的常数向量变成单位向量
w = direction(initial_w)
# 真实迭代次数
cur_iter = 0
while cur_iter < n_iters:
# 获取梯度
last_w = w
# 迭代更新w，不断顺着梯度方向寻找新的w
# 跟梯度下降法不同的是，梯度下降法是-，梯度上升法这里是+
w = w + eta * gradient
# 将获取的新的w重新变成单位向量
w = direction(w)
# 计算前后两次迭代后的目标函数差值的绝对值
if abs(f(w, X) - f(last_w, X)) < epsilon:
break
# 更新迭代次数
cur_iter += 1
return w

X_pca = demean(X)
self.components_ = np.empty(shape=(self._n_components, X.shape[1]))
for i in range(self._n_components):
initial_w = np.random.random(X_pca.shape[1])
w = first_component(X_pca, initial_w, eta, n_iters)
self.components_[i, :] = w
X_pca = X_pca - X_pca.dot(w).reshape(-1, 1) * w

return self

def transform(self, X):
# 将给定的X，映射到各个主成分分量中
assert X.shape[1] == self.components_.shape[1]
return X.dot(self.components_.T)

def inverse_transform(self, X):
# 将给定的X，反向映射回原来的特征空间
assert X.shape[1] == self.components_.shape[0]
return X.dot(self.components_)

def __repr__(self):
return "PCA(n_components=%d)" % self._n_components


import numpy as np
import matplotlib.pyplot as plt
from playLA.PCA import PCA

if __name__ == "__main__":

X = np.empty((100, 2))
X[:, 0] = np.random.uniform(0, 100, size=100)
X[:, 1] = 0.75 * X[:, 0] + 3 + np.random.normal(0, 10, size=100)
pca = PCA(n_components=2)
pca.fit(X)
# 查看两个主成分的方向
print(pca.components_)
pca = PCA(n_components=1)
pca.fit(X)
X_reduction = pca.transform(X)
print(X_reduction.shape)
X_restore = pca.inverse_transform(X_reduction)
print(X_restore.shape)
plt.scatter(X[:, 0], X[:, 1], color='b', alpha=0.5)
plt.scatter(X_restore[:, 0], X_restore[:, 1], color='r', alpha=0.5)
plt.show()


[[ 0.77885098  0.62720901]
[-0.62720655  0.77885297]]
(100, 1)
(100, 2)

0
0 收藏

0 评论
0 收藏
0