（五）神经网络入门之构建多层网络
（五）神经网络入门之构建多层网络
AllenOR灵感 发表于6个月前
（五）神经网络入门之构建多层网络
• 发表于 6个月前
• 阅读 0
• 收藏 0
• 评论 0

# 多层网络的推广

• 多层网络的泛化
• 随机梯度下降的最小批处理分析

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, cross_validation, metrics
from matplotlib.colors import colorConverter, ListedColormap
import itertools
import collections

# 手写数字集

• 一个训练集，用于模型的训练。（输入数据：X_train，目标数据：T_train）
• 一个验证的数据集，用于去评估模型的性能，如果模型在训练数据集上面出现过拟合了，那么可以终止训练了。（输入数据：X_validation，目标数据：T_avlidation）
• 一个测试数据集，用于最终对模型的测试。（输入数据：X_test，目标数据：T_test）
# load the data from scikit-learn.

# Note that the targets are stored as digits, these need to be
#  converted to one-hot-encoding for the output sofmax layer.
T = np.zeros((digits.target.shape[0],10))
T[np.arange(len(T)), digits.target] += 1

# Divide the data into a train and test set.
X_train, X_test, T_train, T_test = cross_validation.train_test_split(
digits.data, T, test_size=0.4)
# Divide the test set into a validation set and final test set.
X_validation, X_test, T_validation, T_test = cross_validation.train_test_split(
X_test, T_test, test_size=0.5)
# Plot an example of each image.
fig = plt.figure(figsize=(10, 1), dpi=100)
for i in range(10):
ax.matshow(digits.images[i], cmap='binary')
ax.axis('off')
plt.show()

# 网络层的泛化

• 一个线性转换层LinearLayer
• 一个Logistic函数LogisticLayer
• 一个softmax函数层SoftmaxOutputLayer

# Define the non-linear functions used
def logistic(z):
return 1 / (1 + np.exp(-z))

def logistic_deriv(y):  # Derivative of logistic function
return np.multiply(y, (1 - y))

def softmax(z):
return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)
# Define the layers used in this model
class Layer(object):
"""Base class for the different layers.
Defines base methods and documentation of methods."""

def get_params_iter(self):
"""Return an iterator over the parameters (if any).
The iterator has the same order as get_params_grad.
The elements returned by the iterator are editable in-place."""
return []

"""Return a list of gradients over the parameters.
The list has the same order as the get_params_iter iterator.
X is the input.
"""
return []

def get_output(self, X):
"""Perform the forward step linear transformation.
X is the input."""
pass

"""Return the gradient at the inputs of this layer.
Y is the pre-computed output of this layer (not needed in this case).
(gradient at input of next layer).
Output layer uses targets T to compute the gradient based on the
pass
class LinearLayer(Layer):
"""The linear layer performs a linear transformation to its input."""

def __init__(self, n_in, n_out):
"""Initialize hidden layer parameters.
n_in is the number of input variables.
n_out is the number of output variables."""
self.W = np.random.randn(n_in, n_out) * 0.1
self.b = np.zeros(n_out)

def get_params_iter(self):
"""Return an iterator over the parameters."""

def get_output(self, X):
"""Perform the forward step linear transformation."""
return X.dot(self.W) + self.b

"""Return a list of gradients over the parameters."""
return [g for g in itertools.chain(np.nditer(JW), np.nditer(Jb))]

"""Return the gradient at the inputs of this layer."""
return output_grad.dot(self.W.T)
class LogisticLayer(Layer):
"""The logistic layer applies the logistic function to its inputs."""

def get_output(self, X):
"""Perform the forward step transformation."""
return logistic(X)

"""Return the gradient at the inputs of this layer."""
return np.multiply(logistic_deriv(Y), output_grad)
class SoftmaxOutputLayer(Layer):
"""The softmax output layer computes the classification propabilities at the output."""

def get_output(self, X):
"""Perform the forward step transformation."""
return softmax(X)

"""Return the gradient at the inputs of this layer."""
return (Y - T) / Y.shape[0]

def get_cost(self, Y, T):
"""Return the cost at the output of this output layer."""
return - np.multiply(T, np.log(Y)).sum() / Y.shape[0]

# 样本模型

# Define a sample model to be trained on the data
hidden_neurons_1 = 20  # Number of neurons in the first hidden-layer
hidden_neurons_2 = 20  # Number of neurons in the second hidden-layer
# Create the model
layers = [] # Define a list of layers
layers.append(LinearLayer(X_train.shape[1], hidden_neurons_1))
layers.append(LogisticLayer())
layers.append(LinearLayer(hidden_neurons_1, hidden_neurons_2))
layers.append(LogisticLayer())
layers.append(LinearLayer(hidden_neurons_2, T_train.shape[1]))
layers.append(SoftmaxOutputLayer())

# BP算法

BP算法在正向传播过程和反向传播过程中的具体细节已经在第四部分中进行了详细的解释，如果对此还有疑问，建议再去学习一下。这一部分，我们只单纯实现在多层神经网络中的BP算法。

###### 正向传播过程

# Define the forward propagation step as a method.
def forward_step(input_samples, layers):
"""
Compute and return the forward activation of each layer in layers.
Input:
input_samples: A matrix of input samples (each row is an input vector)
layers: A list of Layers
Output:
A list of activations where the activation at each index i+1 corresponds to
the activation of layer i in layers. activations[0] contains the input samples.
"""
activations = [input_samples] # List of layer activations
# Compute the forward activations for each layer starting from the first
X = input_samples
for layer in layers:
Y = layer.get_output(X)  # Get the output of the current layer
activations.append(Y)  # Store the output for future processing
X = activations[-1]  # Set the current input as the activations of the previous layer
return activations  # Return the activations of each layer

# 反向传播过程

# Define the backward propagation step as a method
def backward_step(activations, targets, layers):
"""
Perform the backpropagation step over all the layers and return the parameter gradients.
Input:
activations: A list of forward step activations where the activation at
each index i+1 corresponds to the activation of layer i in layers.
activations[0] contains the input samples.
targets: The output targets of the output layer.
layers: A list of Layers corresponding that generated the outputs in activations.
Output:
A list of parameter gradients where the gradients at each index corresponds to
the parameters gradients of the layer at the same index in layers.
"""
output_grad = None  # The error gradient at the output of the current layer
# Propagate the error backwards through all the layers.
#  Use reversed to iterate backwards over the list of layers.
for layer in reversed(layers):
Y = activations.pop()  # Get the activations of the last layer on the stack
# Compute the error at the output layer.
# The output layer error is calculated different then hidden layer error.
else:  # output_grad is not None (layer is not output layer)
# Get the input of this layer (activations of the previous layer)
X = activations[-1]
# Compute the layer parameter gradients used to update the parameters
# Compute gradient at output of previous layer (input of current layer):
return list(param_grads)  # Return the parameter gradients

# 梯度检查

# Perform gradient checking
nb_samples_gradientcheck = 10 # Test the gradients on a subset of the data
# Get the parameter gradients with backpropagation
activations = forward_step(X_temp, layers)

# Set the small change to compute the numerical gradient
eps = 0.0001
# Compute the numerical gradients of the parameters in all layers.
for idx in range(len(layers)):
layer = layers[idx]
# Compute the numerical gradient for each parameter in the layer
for p_idx, param in enumerate(layer.get_params_iter()):
# + eps
param += eps
plus_cost = layers[-1].get_cost(forward_step(X_temp, layers)[-1], T_temp)
# - eps
param -= 2 * eps
min_cost = layers[-1].get_cost(forward_step(X_temp, layers)[-1], T_temp)
# reset param value
param += eps
# Raise error if the numerical grade is not close to the backprop gradient
print('No gradient errors found')

# BP算法中的随机梯度下降

###### 批处理的最小数量

# Create the minibatches
batch_size = 25  # Approximately 25 samples per batch
nb_of_batches = X_train.shape[0] / batch_size  # Number of batches
# Create batches (X,Y) from the training set
XT_batches = zip(
np.array_split(X_train, nb_of_batches, axis=0),  # X samples
np.array_split(T_train, nb_of_batches, axis=0))  # Y targets
###### 随机梯度下降算法的更新

nb_of_iterations函数实现了，更新操作将会在一整个训练集上面进行多次迭代，每一次迭代都是取最小批处理单位的数据量。在每次全部迭代完之后，模型将会在验证集上面进行测试。如果在验证集上面，经过三次的完全迭代，损失函数的值没有下降，那么我们就认为模型已经过拟合了，需要终止模型的训练。或者经过设置的最大值300次，模型也会被终止训练。所以的损失误差值将会被保存下来，以便后续的分析。

# Define a method to update the parameters
"""
Function to update the parameters of the given layers with the given gradients
by gradient descent with the given learning rate.
"""
# The parameter returned by the iterator point to the memory space of
#  the original layer and can thus be modified inplace.
param -= learning_rate * grad  # Update each parameter
# Perform backpropagation
# initalize some lists to store the cost for future analysis
minibatch_costs = []
training_costs = []
validation_costs = []

max_nb_of_iterations = 300  # Train for a maximum of 300 iterations
learning_rate = 0.1  # Gradient descent learning rate

# Train for the maximum number of iterations
for iteration in range(max_nb_of_iterations):
for X, T in XT_batches:  # For each minibatch sub-iteration
activations = forward_step(X, layers)  # Get the activations
minibatch_cost = layers[-1].get_cost(activations[-1], T)  # Get cost
minibatch_costs.append(minibatch_cost)
update_params(layers, param_grads, learning_rate)  # Update the parameters
# Get full training cost for future analysis (plots)
activations = forward_step(X_train, layers)
train_cost = layers[-1].get_cost(activations[-1], T_train)
training_costs.append(train_cost)
# Get full validation cost
activations = forward_step(X_validation, layers)
validation_cost = layers[-1].get_cost(activations[-1], T_validation)
validation_costs.append(validation_cost)
if len(validation_costs) > 3:
# Stop training if the cost on the validation set doesn't decrease
#  for 3 iterations
if validation_costs[-1] >= validation_costs[-2] >= validation_costs[-3]:
break

nb_of_iterations = iteration + 1  # The number of iterations that have been executed
minibatch_x_inds = np.linspace(0, nb_of_iterations, num=nb_of_iterations*nb_of_batches)
iteration_x_inds = np.linspace(1, nb_of_iterations, num=nb_of_iterations)
# Plot the cost over the iterations
plt.plot(minibatch_x_inds, minibatch_costs, 'k-', linewidth=0.5, label='cost minibatches')
plt.plot(iteration_x_inds, training_costs, 'r-', linewidth=2, label='cost full training set')
plt.plot(iteration_x_inds, validation_costs, 'b-', linewidth=3, label='cost validation set')
# Add labels to the plot
plt.xlabel('iteration')
plt.ylabel('$\\xi$', fontsize=15)
plt.title('Decrease of cost over backprop iteration')
plt.legend()
x1,x2,y1,y2 = plt.axis()
plt.axis((0,nb_of_iterations,0,2.5))
plt.grid()
plt.show()

Descrease of cost over backprop iteration

# 模型在测试集上面的性能

# Get results of test data
y_true = np.argmax(T_test, axis=1)  # Get the target outputs
activations = forward_step(X_test, layers)  # Get activation of test samples
y_pred = np.argmax(activations[-1], axis=1)  # Get the predictions made by the network
test_accuracy = metrics.accuracy_score(y_true, y_pred)  # Test set accuracy
print('The accuracy on the test set is {:.2f}'.format(test_accuracy))

The accuracy on the test set is 0.96

# Show confusion table
conf_matrix = metrics.confusion_matrix(y_true, y_pred, labels=None)  # Get confustion matrix
# Plot the confusion table
class_names = ['${:d}$'.format(x) for x in range(0, 10)]  # Digit class names
fig = plt.figure()
# Show class labels on each axis
ax.xaxis.tick_top()
major_ticks = range(0,10)
minor_ticks = [x + 0.5 for x in range(0, 10)]
ax.xaxis.set_ticks(major_ticks, minor=False)
ax.yaxis.set_ticks(major_ticks, minor=False)
ax.xaxis.set_ticks(minor_ticks, minor=True)
ax.yaxis.set_ticks(minor_ticks, minor=True)
ax.xaxis.set_ticklabels(class_names, minor=False, fontsize=15)
ax.yaxis.set_ticklabels(class_names, minor=False, fontsize=15)
# Set plot labels
ax.yaxis.set_label_position("right")
ax.set_xlabel('Predicted label')
ax.set_ylabel('True label')
fig.suptitle('Confusion table', y=1.03, fontsize=15)
# Show a grid to seperate digits
ax.grid(b=True, which=u'minor')
# Color each grid cell according to the number classes predicted
ax.imshow(conf_matrix, interpolation='nearest', cmap='binary')
# Show the number of samples in each cell
for x in xrange(conf_matrix.shape[0]):
for y in xrange(conf_matrix.shape[1]):
color = 'w' if x == y else 'k'
ax.text(x, y, conf_matrix[y,x], ha="center", va="center", color=color)
plt.show()

Confusion table

×