AllenOR灵感

# 矢量化

• 矢量的反向传播
• 梯度检查
• 动量

import numpy as np
import sklearn.datasets
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter, ListedColormap
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

# 定义数据集

# Generate the dataset
X, t = sklearn.datasets.make_circles(n_samples=100, shuffle=False, factor=0.3, noise=0.1)
T = np.zeros((100,2)) # Define target matrix
T[t==1,1] = 1
T[t==0,0] = 1
# Separate the red and blue points for plotting
x_red = X[t==0]
x_blue = X[t==1]

print('shape of X: {}'.format(X.shape))
print('shape of T: {}'.format(T.shape))

shape of X: (100, 2)
shape of T: (100, 2)

# Plot both classes on the x1, x2 plane
plt.plot(x_red[:,0], x_red[:,1], 'ro', label='class red')
plt.plot(x_blue[:,0], x_blue[:,1], 'bo', label='class blue')
plt.grid()
plt.legend(loc=1)
plt.xlabel('$x_1$', fontsize=15)
plt.ylabel('$x_2$', fontsize=15)
plt.axis([-1.5, 1.5, -1.5, 1.5])
plt.title('red vs blue classes in the input space')
plt.show()

# 矢量的反向传播

### 1. 矢量的正向传播

###### 计算输出层的激活结果

# Define the logistic function
def logistic(z):
return 1 / (1 + np.exp(-z))

# Define the softmax function
def softmax(z):
return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

# Function to compute the hidden activations
def hidden_activations(X, Wh, bh):
return logistic(X.dot(Wh) + bh)

# Define output layer feedforward
def output_activations(H, Wo, bo):
return softmax(H.dot(Wo) + bo)

# Define the neural network function
def nn(X, Wh, bh, Wo, bo):
return output_activations(hidden_activations(X, Wh, bh), Wo, bo)

# Define the neural network prediction function that only returns
#  1 or 0 depending on the predicted class
def nn_predict(X, Wh, bh, Wo, bo):
return np.around(nn(X, Wh, bh, Wo, bo))

### 2. 矢量的反向传播

###### 更新输出层的偏差项

# Define the cost function
def cost(Y, T):
return - np.multiply(T, np.log(Y)).sum()

# Define the error function at the output
def error_output(Y, T):
return Y - T

# Define the gradient function for the weight parameters at the output layer
return  H.T.dot(Eo)

# Define the gradient function for the bias parameters at the output layer
return  np.sum(Eo, axis=0, keepdims=True)

###### 更新隐藏层的权重

N个样本中，隐藏层的梯度∂ξ/∂whj可以被如下计算：

###### 更新隐藏层的偏差项

# Define the error function at the hidden layer
def error_hidden(H, Wo, Eo):
# H * (1-H) * (E . Wo^T)
return np.multiply(np.multiply(H,(1 - H)), Eo.dot(Wo.T))

# Define the gradient function for the weight parameters at the hidden layer
return X.T.dot(Eh)

# Define the gradient function for the bias parameters at the output layer
return  np.sum(Eh, axis=0, keepdims=True)

# 梯度检查

# Initialize weights and biases
init_var = 1
# Initialize hidden layer parameters
bh = np.random.randn(1, 3) * init_var
Wh = np.random.randn(2, 3) * init_var
# Initialize output layer parameters
bo = np.random.randn(1, 2) * init_var
Wo = np.random.randn(3, 2) * init_var

# Compute the gradients by backpropagation
# Compute the activations of the layers
H = hidden_activations(X, Wh, bh)
Y = output_activations(H, Wo, bo)
# Compute the gradients of the output layer
Eo = error_output(Y, T)
# Compute the gradients of the hidden layer
Eh = error_hidden(H, Wo, Eo)
Jbh = gradient_bias_hidden(Eh)
# Combine all parameter matrices in a list
params = [Wh, bh, Wo, bo]
# Combine all parameter gradients in a list
grad_params = [JWh, Jbh, JWo, Jbo]

# Set the small change to compute the numerical gradient
eps = 0.0001

# Check each parameter matrix
for p_idx in range(len(params)):
# Check each parameter in each parameter matrix
for row in range(params[p_idx].shape[0]):
for col in range(params[p_idx].shape[1]):
# Copy the parameter matrix and change the current parameter slightly
p_matrix_min = params[p_idx].copy()
p_matrix_min[row,col] -= eps
p_matrix_plus = params[p_idx].copy()
p_matrix_plus[row,col] += eps
# Copy the parameter list, and change the updated parameter matrix
params_min = params[:]
params_min[p_idx] = p_matrix_min
params_plus = params[:]
params_plus[p_idx] =  p_matrix_plus
grad_num = (cost(nn(X, *params_plus), T)-cost(nn(X, *params_min), T))/(2*eps)
# Raise error if the numerical grade is not close to the backprop gradient
print('No gradient errors found')

# 动量反向传播的更新

# Define the update function to update the network parameters over 1 iteration
def backprop_gradients(X, T, Wh, bh, Wo, bo):
# Compute the output of the network
# Compute the activations of the layers
H = hidden_activations(X, Wh, bh)
Y = output_activations(H, Wo, bo)
# Compute the gradients of the output layer
Eo = error_output(Y, T)
# Compute the gradients of the hidden layer
Eh = error_hidden(H, Wo, Eo)
return [JWh, Jbh, JWo, Jbo]

def update_velocity(X, T, ls_of_params, Vs, momentum_term, learning_rate):
# ls_of_params = [Wh, bh, Wo, bo]
# Js = [JWh, Jbh, JWo, Jbo]
return [momentum_term * V - learning_rate * J for V,J in zip(Vs, Js)]

def update_params(ls_of_params, Vs):
# ls_of_params = [Wh, bh, Wo, bo]
# Vs = [VWh, Vbh, VWo, Vbo]
return [P + V for P,V in zip(ls_of_params, Vs)]
# Run backpropagation
# Initialize weights and biases
init_var = 0.1
# Initialize hidden layer parameters
bh = np.random.randn(1, 3) * init_var
Wh = np.random.randn(2, 3) * init_var
# Initialize output layer parameters
bo = np.random.randn(1, 2) * init_var
Wo = np.random.randn(3, 2) * init_var
# Set the learning rate
learning_rate = 0.02
momentum_term = 0.9

# define the velocities Vs = [VWh, Vbh, VWo, Vbo]
Vs = [np.zeros_like(M) for M in [Wh, bh, Wo, bo]]

lr_update = learning_rate / nb_of_iterations # learning rate update rule
ls_costs = [cost(nn(X, Wh, bh, Wo, bo), T)]  # list of cost over the iterations
for i in range(nb_of_iterations):
# Update the velocities and the parameters
Vs = update_velocity(X, T, [Wh, bh, Wo, bo], Vs, momentum_term, learning_rate)
Wh, bh, Wo, bo = update_params([Wh, bh, Wo, bo], Vs)
ls_costs.append(cost(nn(X, Wh, bh, Wo, bo), T))
# Plot the cost over the iterations
plt.plot(ls_costs, 'b-')
plt.xlabel('iteration')
plt.ylabel('$\\xi$', fontsize=15)
plt.title('Decrease of cost over backprop iteration')
plt.grid()
plt.show()

Decrease of cost over backprop iteration

# 可视化训练分类结果

# Plot the resulting decision boundary
# Generate a grid over the input space to plot the color of the
#  classification at that grid point
nb_of_xs = 200
xs1 = np.linspace(-2, 2, num=nb_of_xs)
xs2 = np.linspace(-2, 2, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
pred = nn_predict(np.asmatrix([xx[i,j], yy[i,j]]), Wh, bh, Wo, bo)
classification_plane[i,j] = pred[0,0]
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('b', alpha=0.30),
colorConverter.to_rgba('r', alpha=0.30)])

# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
# Plot both classes on the x1, x2 plane
plt.plot(x_red[:,0], x_red[:,1], 'ro', label='class red')
plt.plot(x_blue[:,0], x_blue[:,1], 'bo', label='class blue')
plt.grid()
plt.legend(loc=1)
plt.xlabel('$x_1$', fontsize=15)
plt.ylabel('$x_2$', fontsize=15)
plt.axis([-1.5, 1.5, -1.5, 1.5])
plt.title('red vs blue classification boundary')
plt.show()

red vs blue classification boundary

# 输入域的转换

# Plot the projection of the input onto the hidden layer

# Define the projections of the blue and red classes
H_blue = hidden_activations(x_blue, Wh, bh)
H_red = hidden_activations(x_red, Wh, bh)
# Plot the error surface
fig = plt.figure()
ax = Axes3D(fig)
ax.plot(np.ravel(H_blue[:,0]), np.ravel(H_blue[:,1]), np.ravel(H_blue[:,2]), 'bo')
ax.plot(np.ravel(H_red[:,0]), np.ravel(H_red[:,1]), np.ravel(H_red[:,2]), 'ro')
ax.set_xlabel('$h_1$', fontsize=15)
ax.set_ylabel('$h_2$', fontsize=15)
ax.set_zlabel('$h_3$', fontsize=15)
ax.view_init(elev=10, azim=-40)
plt.title('Projection of the input X onto the hidden layer H')
plt.grid()
plt.show()

Projection of the input X onto the hidden layer H

### AllenOR灵感

Softmax分类函数 这篇教程是翻译Peter Roelants写的神经网络教程，作者已经授权翻译，这是原文。 该教程将介绍如何入门神经网络，一共包含五部分。你可以在以下链接找到完整内容。 （一）神经...

qq_37634812
2017/12/07
0
0

2017/07/13
0
0

选自machinelearningmastery 　　作者：Jason Brownlee 　　机器之心编译 　　参与：张倩、刘晓坤 　　 　　本文介绍了 10 个常见机器学习案例，这些案例需要用线性代数才能得到最好的理...

04/30
0
0
Pandas系列4-数据矢量化

geekpy
06/21
0
0
RBF网络——核心思想：把向量从低维m映射到高维P，低维线性不可分的情况到高维就线性可分了

RBF网络能够逼近任意的非线性函数，可以处理系统内的难以解析的规律性，具有良好的泛化能力，并有很快的学习收敛速度，已成功应用于非线性函数逼近、时间序列分析、数据分类、模式识别、信息...

2017/11/09
0
0

Windows 环境下安装 Oracle JDK

honeymose
12分钟前
0
0

4
0

go4it

3
0

em_aaron

5
0
CentOS7+git+github创建Python开发环境

1.准备CentOS7 (1)下载VMware Workstation https://pan.baidu.com/s/1miFU8mk (2)下载CentOS7镜像 https://mirrors.aliyun.com/centos/ (3)安装CentOS7系统 http://blog.51cto.com/fengyuns......

4
0