AllenOR灵感

隐藏层

• 隐藏层设计
• 非线性激活函数
• BP算法

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import colorConverter, ListedColormap
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
定义数据集

# Define and generate the samples
nb_of_samples_per_class = 20  # The number of sample in each class
blue_mean = [0]  # The mean of the blue class
red_left_mean = [-2]  # The mean of the red class
red_right_mean = [2]  # The mean of the red class

std_dev = 0.5  # standard deviation of both classes
# Generate samples from both classes
x_blue = np.random.randn(nb_of_samples_per_class, 1) * std_dev + blue_mean
x_red_left = np.random.randn(nb_of_samples_per_class/2, 1) * std_dev + red_left_mean
x_red_right = np.random.randn(nb_of_samples_per_class/2, 1) * std_dev + red_right_mean

# Merge samples in set of input variables x, and corresponding set of
# output variables t
x = np.vstack((x_blue, x_red_left, x_red_right))
t = np.vstack((np.ones((x_blue.shape[0],1)),
np.zeros((x_red_left.shape[0],1)),
np.zeros((x_red_right.shape[0], 1))))
# Plot samples from both classes as lines on a 1D space
plt.figure(figsize=(8,0.5))
plt.xlim(-3,3)
plt.ylim(-1,1)
# Plot samples
plt.plot(x_blue, np.zeros_like(x_blue), 'b|', ms = 30)
plt.plot(x_red_left, np.zeros_like(x_red_left), 'r|', ms = 30)
plt.plot(x_red_right, np.zeros_like(x_red_right), 'r|', ms = 30)
plt.gca().axes.get_yaxis().set_visible(False)
plt.title('Input samples from the blue and red class')
plt.xlabel('$x$', fontsize=15)
plt.show()

非线性激活函数

RBF函数

RBF函数的导数为定义为：

RBF函数的导数
# Define the rbf function
def rbf(z):
return np.exp(-z**2)
# Plot the rbf function
z = np.linspace(-6,6,100)
plt.plot(z, rbf(z), 'b-')
plt.xlabel('$z$', fontsize=15)
plt.ylabel('$e^{-z^2}$', fontsize=15)
plt.title('RBF function')
plt.grid()
plt.show()

RBF函数可视化

BP算法

1. 正向传播去计算神经网络的输出。
2. 利用神经网络得出的结果和真实结果之间的误差进行反向传播来更新神经网络的参数。

1. 正向传播

计算输出结果的激活函数

# Define the logistic function
def logistic(z):
return 1 / (1 + np.exp(-z))

# Function to compute the hidden activations
def hidden_activations(x, wh):
return rbf(x * wh)

# Define output layer feedforward
def output_activations(h , wo):
return logistic(h * wo - 1)

# Define the neural network function
def nn(x, wh, wo):
return output_activations(hidden_activations(x, wh), wo)

# Define the neural network prediction function that only returns
#  1 or 0 depending on the predicted class
def nn_predict(x, wh, wo):
return np.around(nn(x, wh, wo))

2. 反向传播

计算损失函数

# Define the cost function
def cost(y, t):
return - np.sum(np.multiply(t, np.log(y)) + np.multiply((1-t), np.log(1-y)))

# Define a function to calculate the cost for a given set of parameters
def cost_for_param(x, wh, wo, t):
return cost(nn(x, wh, wo) , t)
# Plot the cost in function of the weights
# Define a vector of weights for which we want to plot the cost
nb_of_ws = 200 # compute the cost nb_of_ws times in each dimension
wsh = np.linspace(-10, 10, num=nb_of_ws) # hidden weights
wso = np.linspace(-10, 10, num=nb_of_ws) # output weights
ws_x, ws_y = np.meshgrid(wsh, wso) # generate grid
cost_ws = np.zeros((nb_of_ws, nb_of_ws)) # initialize cost matrix
# Fill the cost matrix for each combination of weights
for i in range(nb_of_ws):
for j in range(nb_of_ws):
cost_ws[i,j] = cost(nn(x, ws_x[i,j], ws_y[i,j]) , t)
# Plot the cost function surface
fig = plt.figure()
ax = Axes3D(fig)
# plot the surface
surf = ax.plot_surface(ws_x, ws_y, cost_ws, linewidth=0, cmap=cm.pink)
ax.view_init(elev=60, azim=-30)
cbar = fig.colorbar(surf)
ax.set_xlabel('$w_h$', fontsize=15)
ax.set_ylabel('$w_o$', fontsize=15)
ax.set_zlabel('$\\xi$', fontsize=15)
cbar.ax.set_ylabel('$\\xi$', fontsize=15)
plt.title('Cost function surface')
plt.grid()
plt.show()

输出层更新

∂ξi/∂wo是每个样本i的输出梯度，参照第二部分教程的方法，我们可以得出相应的推导公式：

gradient_output(y, t)函数实现了δogradient_weight_out(h, grad_output)函数实现了∂ξ/∂wo

隐藏层更新

∂ξi/∂wh是每个样本i在影藏层的梯度，具体计算如下：

∂ξi/∂zhi=δhi表示误差对于隐藏层输入的梯度。这个误差也可以解释为，zhi对于最后误差的贡献。那么，接下来我们定义一下这个误差梯度δhi

gradient_hidden(wo, grad_output)函数实现了δh
gradient_weight_hidden(x, zh, h, grad_hidden)函数实现了∂ξ/∂wh
backprop_update(x, t, wh, wo, learning_rate)函数实现了BP算法的每次迭代过程。

# Define the error function
return y - t

# Define the gradient function for the weight parameter at the output layer

# Define the gradient function for the hidden layer

# Define the gradient function for the weight parameter at the hidden layer
return x * -2 * zh * h * grad_hidden

# Define the update function to update the network parameters over 1 iteration
def backprop_update(x, t, wh, wo, learning_rate):
# Compute the output of the network
# This can be done with y = nn(x, wh, wo), but we need the intermediate
#  h and zh for the weight updates.
zh = x * wh
h = rbf(zh)  # hidden_activations(x, wh)
y = output_activations(h, wo)
# Compute the gradient at the output
# Get the delta for wo
# Compute the gradient at the hidden layer
# Get the delta for wh
# return the update parameters
return (wh-d_wh.sum(), wo-d_wo.sum())
BP算法更新

# Run backpropagation
# Set the initial weight parameter
wh = 2
wo = -5
# Set the learning rate
learning_rate = 0.2

lr_update = learning_rate / nb_of_iterations # learning rate update rule
w_cost_iter = [(wh, wo, cost_for_param(x, wh, wo, t))]  # List to store the weight values over the iterations
for i in range(nb_of_iterations):
learning_rate -= lr_update # decrease the learning rate
# Update the weights via backpropagation
wh, wo = backprop_update(x, t, wh, wo, learning_rate)
w_cost_iter.append((wh, wo, cost_for_param(x, wh, wo, t)))  # Store the values for plotting

# Print the final cost
print('final cost is {:.2f} for weights wh: {:.2f} and wo: {:.2f}'.format(cost_for_param(x, wh, wo, t), wh, wo))

final cost is 10.81 for weights wh: 1.20 and wo: 5.56

# Plot the weight updates on the error surface
# Plot the error surface
fig = plt.figure()
ax = Axes3D(fig)
surf = ax.plot_surface(ws_x, ws_y, cost_ws, linewidth=0, cmap=cm.pink)
ax.view_init(elev=60, azim=-30)
cbar = fig.colorbar(surf)
cbar.ax.set_ylabel('$\\xi$', fontsize=15)

for i in range(1, len(w_cost_iter)):
wh1, wo1, c1 = w_cost_iter[i-1]
wh2, wo2, c2 = w_cost_iter[i]
# Plot the weight-cost value and the line that represents the update
ax.plot([wh1], [wo1], [c1], 'w+')  # Plot the weight cost value
ax.plot([wh1, wh2], [wo1, wo2], [c1, c2], 'w-')
# Plot the last weights
wh1, wo1, c1 = w_cost_iter[len(w_cost_iter)-1]
ax.plot([wh1], [wo1], c1, 'w+')
# Shoz figure
ax.set_xlabel('$w_h$', fontsize=15)
ax.set_ylabel('$w_o$', fontsize=15)
ax.set_zlabel('$\\xi$', fontsize=15)
plt.grid()
plt.show()

分类结果的可视化

# Plot the resulting decision boundary
# Generate a grid over the input space to plot the color of the
#  classification at that grid point
nb_of_xs = 100
xs = np.linspace(-3, 3, num=nb_of_xs)
ys = np.linspace(-1, 1, num=nb_of_xs)
xx, yy = np.meshgrid(xs, ys) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
classification_plane[i,j] = nn_predict(xx[i,j], wh, wo)
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.25),
colorConverter.to_rgba('b', alpha=0.25)])

# Plot the classification plane with decision boundary and input samples
plt.figure(figsize=(8,0.5))
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.xlim(-3,3)
plt.ylim(-1,1)
# Plot samples from both classes as lines on a 1D space
plt.plot(x_blue, np.zeros_like(x_blue), 'b|', ms = 30)
plt.plot(x_red_left, np.zeros_like(x_red_left), 'r|', ms = 30)
plt.plot(x_red_right, np.zeros_like(x_red_right), 'r|', ms = 30)
plt.gca().axes.get_yaxis().set_visible(False)
plt.title('Input samples and their classification')
plt.xlabel('x')
plt.show()

输入域的转换

# Plot projected samples from both classes as lines on a 1D space
plt.figure(figsize=(8,0.5))
plt.xlim(-0.01,1)
plt.ylim(-1,1)
# Plot projected samples
plt.plot(hidden_activations(x_blue, wh), np.zeros_like(x_blue), 'b|', ms = 30)
plt.plot(hidden_activations(x_red_left, wh), np.zeros_like(x_red_left), 'r|', ms = 30)
plt.plot(hidden_activations(x_red_right, wh), np.zeros_like(x_red_right), 'r|', ms = 30)
plt.gca().axes.get_yaxis().set_visible(False)
plt.title('Projection of the input samples by the hidden layer.')
plt.xlabel('h')
plt.show()

RBF函数产生图

AllenOR灵感

Tensorflow 入门学习5 建立一个隐藏层的神经网络实现回归分析

09/18
0
0
python在线神经网络实现手写字符识别系统

oxuzhenyi
2017/02/11
0
0

Intelligent Software Development
2016/11/10
0
0
Tensorflow 入门学习14.TensorFlow实现LeNet实例

10/02
0
0

【方向】
2017/12/02
0
0

lionets
12分钟前
0
0
SpringSecurity404需要注意的地方

15分钟前
0
0
10分钟读懂阿里巴巴高级专家在Flutter Live2018的分享

16分钟前
1
0
RxJava window操作符

woshixin
23分钟前
1
0
05.Beetl标签函数以及定界符、占位符介绍---《Beetl视频课程》

Gavin-King
24分钟前
1
0