# 使用Numpy进行深度学习中5大反向传播优化算法的性能比较

2020/06/10 09:00

DeepHub

## 方法

 import matplotlib.pyplot as plt fig = plt.figure(figsize=(13,6)) ax = plt.axes(projection="3d") start, stop, n_values = -8, 8, 800 x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals) Z = np.sqrt(0.1*X**2 + 2*Y**2) plt.contourf(X,Y,Z,) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='winter', edgecolor='none') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z')

2-Momentum

4-RMSprop

 lr=0.4 x1=-4 x2=-6 l1_gd=[] l2_gd=[] for i in range(20):   l1_gd.append(x1)   l2_gd.append(x2)   x1=x1-lr*0.2*x1   x2=x2-lr*4*x2

 fig = plt.figure(figsize=(13,6)) left, bottom, width, height = 100, 0.1, 0.8, 0.8  ax = fig.add_axes([left, bottom, width, height])  start, stop, n_values = -8, 8, 100  x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals)   Z = np.sqrt(0.1*X**2 + 2*Y**2)  plt.contourf(X,Y,Z,)  plt.plot(l1_gd[:15],l2_gd[:15],color="red",marker="*",markeredgecolor="black",linewidth=3,label="Gradient Descent")  plt.figure(figsize=(15,10)) plt.figure(figsize=(15,10)) ax.set_title('Level Sets of the Function',size=20) ax.set_xlabel('x (cm)') ax.set_ylabel('y (cm)') ax.legend()   plt.show() 

## 2、Momentum

 v1,v2=0,0 gamma,lr=0.5,0.4 x1,x2=-4,-6 l1_gd_m,l2_gd_m=[],[] for i in range(20):   l1_gd_m.append(x1)   l2_gd_m.append(x2)   v1=gamma*v1+(0.2*x1)   v2=gamma*v2+(4*x2)   x1=x1-lr*v1   x2=x2-lr*v2

 import matplotlib.pyplot as plt import numpy as np  fig = plt.figure(figsize=(13,6)) left, bottom, width, height = 100, 0.1, 0.8, 0.8  ax = fig.add_axes([left, bottom, width, height])  start, stop, n_values = -8, 8, 100  x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals)   Z = np.sqrt(0.1*X**2 + 2*Y**2)  plt.contourf(X,Y,Z,)  plt.plot(l1_gd[:15],l2_gd[:15],color="red",marker="*",markeredgecolor="black",linewidth=3,label="Gradient Descent") plt.plot(l1_gd_m[:15],l2_gd_m[:15],color="yellow",marker="*",markeredgecolor="orange",linewidth=3,label="Gradient Descent (Momentum =0.5)")  plt.figure(figsize=(15,10)) plt.figure(figsize=(15,10)) ax.set_title('Level Sets of the Function',size=20) ax.set_xlabel('x (cm)') ax.set_ylabel('y (cm)') ax.legend()   plt.show()

Gt是一个对角矩阵组成的平方和过去的渐变和ϵ平滑项。此外，表示矩阵-向量积运算。

 v1,v2=0,0 gamma,lr=0.9,0.4 x1,x2=-4,-6 l1_adagrad,l2_adagrad=[],[] for i in range(20):   l1_adagrad.append(x1)   l2_adagrad.append(x2)   v1=v1+(0.2*x1)**2   v2=v2+(4*x2)**2   x1=x1-(lr/math.sqrt(v1+c))*0.2*x1   x2=x2-(lr/math.sqrt(v2+c))*4*x2

 import matplotlib.pyplot as plt import numpy as np  fig = plt.figure(figsize=(13,6)) left, bottom, width, height = 100, 0.1, 0.8, 0.8  ax = fig.add_axes([left, bottom, width, height])  start, stop, n_values = -8, 8, 100  x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals)   Z = np.sqrt(0.1*X**2 + 2*Y**2)  plt.contourf(X,Y,Z,)  plt.plot(l1_gd[:15],l2_gd[:15],color="red",marker="*",markeredgecolor="black",linewidth=3,label="Gradient Descent") plt.plot(l1_gd_m[:15],l2_gd_m[:15],color="yellow",marker="*",markeredgecolor="orange",linewidth=3,label="Gradient Descent (Momentum =0.5)") plt.plot(l1_adagrad[:15],l2_adagrad[:15],color="blue",marker="*",markeredgecolor="black",linewidth=3,label="Adagrad") plt.figure(figsize=(15,10)) plt.figure(figsize=(15,10)) ax.set_title('Level Sets of the Function',size=20) ax.set_xlabel('x (cm)') ax.set_ylabel('y (cm)') ax.legend()   plt.show()

## 4、RMSprop

 v1,v2=0,0 gamma,lr=0.9,0.4 x1,x2=-4,-6 l1,l2=[],[] for i in range(20):   l1.append(x1)   l2.append(x2)   v1=gamma*v1+(1-gamma)*(0.2*x1)**2   v2=gamma*v2+(1-gamma)*(4*x2)**2   x1=x1-(lr/math.sqrt(v1+c))*0.2*x1   x2=x2-(lr/math.sqrt(v2+c))*4*x2

 import matplotlib.pyplot as plt import numpy as np  fig = plt.figure(figsize=(13,6)) left, bottom, width, height = 100, 0.1, 0.8, 0.8  ax = fig.add_axes([left, bottom, width, height])  start, stop, n_values = -8, 8, 100  x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals)   Z = np.sqrt(0.1*X**2 + 2*Y**2)  plt.contourf(X,Y,Z,)  plt.plot(l1_gd[:15],l2_gd[:15],color="red",marker="*",markeredgecolor="black",linewidth=3,label="Gradient Descent") plt.plot(l1_gd_m[:15],l2_gd_m[:15],color="yellow",marker="*",markeredgecolor="orange",linewidth=3,label="Gradient Descent (Momentum =0.5)") plt.plot(l1_adagrad[:15],l2_adagrad[:15],color="blue",marker="*",markeredgecolor="black",linewidth=3,label="Adagrad") plt.plot(l1[:15],l2[:15],color="g",marker="*",markeredgecolor="b",linewidth=3,label="RMSprop Algorithm") plt.figure(figsize=(15,10)) plt.figure(figsize=(15,10)) ax.set_title('Level Sets of the Function',size=20) ax.set_xlabel('x (cm)') ax.set_ylabel('y (cm)') ax.legend()   plt.show()

 v1,v2,s1,s2=0,0,0,0 gamma,beta,lr=0.9,0.999,0.4 x1,x2=-6,-6 l1_adam,l2_adam=[],[] for i in range(20):   l1_adam.append(x1)   l2_adam.append(x2)   v1=gamma*v1+(1-gamma)*(0.2*x1)   v2=gamma*v2+(1-gamma)*(4*x2)   s1=beta*s1+(1-beta)*(0.2*x1)**2   s2=beta*s2+(1-beta)*(4*x2)**2   m_hat_v1= v1 / (1 - np.power(gamma, i+1))   m_hat_v2 = v2 / (1 - np.power(gamma, i+1))   s_hat_s1= s1 / (1 - np.power(beta, i+1))   s_hat_s2 = s2 / (1 - np.power(beta, i+1))   x1=x1-(lr)*(m_hat_v1/math.sqrt((s_hat_s1)+c))   x2=x2-(lr)*(m_hat_v2/math.sqrt((s_hat_s2)+c))   print(x1,x2)   if abs(x1)<=0.1 and abs(x2)<0.1:     break

 import matplotlib.pyplot as plt import numpy as np  fig = plt.figure(figsize=(13,6)) left, bottom, width, height = 100, 0.1, 0.8, 0.8  ax = fig.add_axes([left, bottom, width, height])  start, stop, n_values = -8, 8, 100  x_vals = np.linspace(start, stop, n_values) y_vals = np.linspace(start, stop, n_values) X, Y = np.meshgrid(x_vals, y_vals)   Z = np.sqrt(0.1*X**2 + 2*Y**2)  plt.contourf(X,Y,Z,)  plt.plot(l1_gd[:15],l2_gd[:15],color="red",marker="*",markeredgecolor="black",linewidth=3,label="Gradient Descent") plt.plot(l1_gd_m[:15],l2_gd_m[:15],color="yellow",marker="*",markeredgecolor="orange",linewidth=3,label="Gradient Descent (Momentum =0.5)") plt.plot(l1_adagrad[:15],l2_adagrad[:15],color="blue",marker="*",markeredgecolor="black",linewidth=3,label="Adagrad") plt.plot(l1[:15],l2[:15],color="g",marker="*",markeredgecolor="b",linewidth=3,label="RMSprop Algorithm") plt.plot(l1_adam[:20],l2_adam[:20],color="Brown",marker="*",markeredgecolor="pink",linewidth=3,label="Adam") plt.figure(figsize=(15,10)) plt.figure(figsize=(15,10)) ax.set_title('Level Sets of the Function',size=20) ax.set_xlabel('x (cm)') ax.set_ylabel('y (cm)') ax.legend()   plt.show()

DeepHub

0
10 收藏

0 评论
10 收藏
0