# -*- coding: utf-8 -*-
"""
Created on Thu Jun 13 10:38:32 2019

@author: xingzhi
"""
import math
import numpy as np
import sklearn
import sklearn.datasets
import matplotlib.pyplot as plt
import sklearn.linear_model
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

# 函数：绘制决策边界
def plot_decision_boundary(pred_func):
    # 设置边界范围 +-0.5留白
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01 #格网化步长
    # 格网化，生成网格点坐标矩阵
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # 预测网格点的取值
    z1_grid,a_grid,z2_grid,Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 绘制决策边界（填充等高线）和样本点
    #plt.subplot(3,2,2)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

# 函数：绘制特征空间 二维
def plot_transformed_representation(samples,ind1,ind2,pred_func,typ):
    # 设置边界范围 +-0.5留白
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.05 #格网化步长 为体现出离散化效果，相较决策边界加长
    # 格网化，生成网格点坐标矩阵
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # 预测网格点的取值
    z1_grid,a_grid,z2_grid,Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 绘制特征空间 线性变换后、激活值、输出层
    if typ == "z1":
        plt.scatter(z1_grid[:,ind1],z1_grid[:,ind2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.2)
    elif typ == "a1":
        plt.scatter(a_grid[:,ind1],a_grid[:,ind2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.2)
    elif typ == "z2":
        plt.scatter(z2_grid[:,ind1],z2_grid[:,ind2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.2)
    plt.scatter(samples[:,ind1],samples[:,ind2],c=y,cmap=plt.cm.Spectral,s=100)

# 函数：计算损失函数
def calculate_loss(model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # 前向传播
    z1 = X.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    # 计算损失值
    corect_logprobs = -np.log(probs[range(num_examples), y])
    data_loss = np.sum(corect_logprobs)
    # 正则化
    data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples * data_loss

# 函数：预测 softmax输出0 或 1 
def predict(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # 前向传播
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return z1,a1,z2,np.argmax(probs, axis=1)

# 函数：建立模型 nn_hdim为隐层神经元数 num_pass为训练batch数  print_loss若为True 每1000次迭代输出损失
def build_model(nn_hdim, num_passes=20000, print_loss=False): 
    # 用随机值初始化参数 准备学习
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))
    # 用于返回的模型
    model = {} 
    # 梯度下降 （对每个batch）
    for i in range(0, num_passes): 
        # 前向传播
        z1 = X.dot(W1) + b1
        a1 = np.tanh(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) 
        # 反向传播
        delta3 = probs
        delta3[range(num_examples), y] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0) 
        # 正则化 (b1 和 b2 不需要)
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1
        # 梯度下降更新参数
        W1 += -epsilon * dW1
        b1 += -epsilon * db1
        W2 += -epsilon * dW2
        b2 += -epsilon * db2 
        # 将新参数值更新至模型model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        # 选择是否输出损失
        if print_loss and i % 1000 == 0:
          print ("Loss after iteration %i: %f" %(i, calculate_loss(model)))
    return z1,a1,z2,model

### Moons型数据 隐层3节点
# 设置随机数种子
np.random.seed(0)

# 生成数据集
X, y = sklearn.datasets.make_moons(100, noise=0.20) # moons数据集
#X, y = sklearn.datasets.make_circles(n_samples=100,noise=0.2,factor=0.2,random_state=1) # circles数据集

# 网络参数
num_examples = len(X) # 训练样点数量
nn_input_dim = 2 # 输入层神经元数量
nn_output_dim = 2 # 输出层神经元数量

# 梯度下降参数
epsilon = 0.01 # 学习率
reg_lambda = 0.01 # 正则化

# 建立模型
z1_xy,a_xy,z2_xy,model = build_model(3, print_loss=False)

# 调整子图间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1, hspace=1)
# 绘制样本点
plt.figure(figsize=(17,15))
plt.subplot(3,2,1)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.title("Samples (Moons)") 
#plt.title("Samples (Circles)") 

# 绘制决策边界
plt.subplot(3,2,2)
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3(Circles)") 

# 绘制变换后特征空间
plt.subplot(3,3,4)
plot_transformed_representation(z1_xy,0,1,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&1 of hidden layer(fc)") 
plt.subplot(3,3,5)
plot_transformed_representation(z1_xy,1,2,lambda x: predict(model,x),"z1")
plt.title("Neurons 1&2 of hidden layer(fc)") 
plt.subplot(3,3,6)
plot_transformed_representation(z1_xy,0,2,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&2 of hidden layer(fc)") 

plt.subplot(3,3,7)
plot_transformed_representation(a_xy,0,1,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&1 of hidden layer(tanh)") 
plt.subplot(3,3,8)
plot_transformed_representation(a_xy,1,2,lambda x: predict(model,x),"a1")
plt.title("Neurons 1&2 of hidden layer(tanh)") 
plt.subplot(3,3,9)
plot_transformed_representation(a_xy,0,2,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&2 of hidden layer(tanh)")
plt.show()

# 绘制三维特征空间
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.05
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
z1_grid,a_grid,z2_grid,Z = predict(model,np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig = plt.figure(figsize=(18,8))

ax = fig.add_subplot(1,2,1,projection="3d")
ax.scatter(z1_xy[:,0],z1_xy[:,1],z1_xy[:,2],c=y,cmap=plt.cm.Spectral,s=100)
ax.scatter(z1_grid[:,0],z1_grid[:,1],z1_grid[:,2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('Neurons 0')
ax.set_ylabel('Neurons 1')
ax.set_zlabel('Neurons 2')
plt.title("3D Transformed Representation (fc)")
def update(i):
    ax.azim = i * 10
    return ax 
#ani = FuncAnimation(fig,update, 25,  interval=500,blit=False);
#ani.save(r'C:\Users\xingzhi\Desktop\图像模式识别\3D.gif', writer='imagemagick')

ax = fig.add_subplot(1,2,2,projection="3d")
ax.scatter(a_xy[:,0],a_xy[:,1],a_xy[:,2],c=y,cmap=plt.cm.Spectral,s=100)
ax.scatter(a_grid[:,0],a_grid[:,1],a_grid[:,2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('Neurons 0')
ax.set_ylabel('Neurons 1')
ax.set_zlabel('Neurons 2')
plt.title("3D Transformed Representation (tanh)")
def update(i):
    ax.azim = i * 10
    return ax 
#ani = FuncAnimation(fig,update, 25,  interval=500,blit=False);
#ani.save(r'C:\Users\xingzhi\Desktop\图像模式识别\3D.gif', writer='imagemagick')
plt.show()
...;

<Figure size 432x288 with 0 Axes>

搭建232的网络模型，对随机生成的100个Moons型数据进行分类，保留学习到的参数绘制隐层线性变换、激活函数变换后特征空间，观察形态分布。由于隐层有三个节点，分别每两个节点绘制，再加上三维空间中的结果展示。

### Circles型数据 隐层3节点
# 设置随机数种子
np.random.seed(0)

# 生成数据集
#X, y = sklearn.datasets.make_moons(100, noise=0.20) # moons数据集
X, y = sklearn.datasets.make_circles(n_samples=100,noise=0.2,factor=0.2,random_state=1) # circles数据集

# 网络参数
num_examples = len(X) # 训练样点数量
nn_input_dim = 2 # 输入层神经元数量
nn_output_dim = 2 # 输出层神经元数量

# 梯度下降参数
epsilon = 0.01 # 学习率
reg_lambda = 0.01 # 正则化

# 建立模型
z1_xy,a_xy,z2_xy,model = build_model(3, print_loss=False)

# 调整子图间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1, hspace=1)
# 绘制样本点
plt.figure(figsize=(17,15))
plt.subplot(3,2,1)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.title("Samples (Moons)") 
#plt.title("Samples (Circles)") 

# 绘制决策边界
plt.subplot(3,2,2)
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3(Circles)") 

# 绘制变换后特征空间
plt.subplot(3,3,4)
plot_transformed_representation(z1_xy,0,1,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&1 of hidden layer(fc)") 
plt.subplot(3,3,5)
plot_transformed_representation(z1_xy,1,2,lambda x: predict(model,x),"z1")
plt.title("Neurons 1&2 of hidden layer(fc)") 
plt.subplot(3,3,6)
plot_transformed_representation(z1_xy,0,2,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&2 of hidden layer(fc)") 

plt.subplot(3,3,7)
plot_transformed_representation(a_xy,0,1,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&1 of hidden layer(tanh)") 
plt.subplot(3,3,8)
plot_transformed_representation(a_xy,1,2,lambda x: predict(model,x),"a1")
plt.title("Neurons 1&2 of hidden layer(tanh)") 
plt.subplot(3,3,9)
plot_transformed_representation(a_xy,0,2,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&2 of hidden layer(tanh)")
#plt.show()

# 绘制三维特征空间
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.05
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
z1_grid,a_grid,z2_grid,Z = predict(model,np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig = plt.figure(figsize=(18,8))

ax = fig.add_subplot(1,2,1,projection="3d")
ax.scatter(z1_xy[:,0],z1_xy[:,1],z1_xy[:,2],c=y,cmap=plt.cm.Spectral,s=100)
ax.scatter(z1_grid[:,0],z1_grid[:,1],z1_grid[:,2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('Neurons 0')
ax.set_ylabel('Neurons 1')
ax.set_zlabel('Neurons 2')
plt.title("3D Transformed Representation (fc)")
def update(i):
    ax.azim = i * 10
    return ax 
#ani = FuncAnimation(fig,update, 25,  interval=500,blit=False);
#ani.save(r'C:\Users\xingzhi\Desktop\图像模式识别\3D.gif', writer='imagemagick')

ax = fig.add_subplot(1,2,2,projection="3d")
ax.scatter(a_xy[:,0],a_xy[:,1],a_xy[:,2],c=y,cmap=plt.cm.Spectral,s=100)
ax.scatter(a_grid[:,0],a_grid[:,1],a_grid[:,2],c=Z.ravel(),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('Neurons 0')
ax.set_ylabel('Neurons 1')
ax.set_zlabel('Neurons 2')
plt.title("3D Transformed Representation (tanh)")
def update(i):
    ax.azim = i * 10
    return ax 
#ani = FuncAnimation(fig,update, 25,  interval=500,blit=False);
#ani.save(r'C:\Users\xingzhi\Desktop\图像模式识别\3D.gif', writer='imagemagick')
plt.show()
...;

<Figure size 432x288 with 0 Axes>

搭建232的网络模型，对随机生成的100个Circles型数据进行分类，保留学习到的参数绘制隐层线性变换、激活函数变换后特征空间，观察形态分布。由于隐层有三个节点，分别每两个节点绘制，再加上三维空间中的结果展示从以上两组结果来看，实际线性变换对原始样本空间的改变相对小，而激活函数变换后特征空间的形态变化较大，在三维空间的变换结果内，可以清晰地看到利用一个二维平面即可将两组数据分开，二维空间内的特征分布可视作三维空间在不同平面上的投影，而由fc->tanh的变化的形态特征总是可以用tanh函数（激活函数）解释。

考虑利用232的网络可以对两组数据进行较好的区分，那么更少的神经元是否也能如此？

### Moons型数据 隐层2节点
# 设置随机数种子
np.random.seed(0)

# 生成数据集
X, y = sklearn.datasets.make_moons(100, noise=0.20) # moons数据集
#X, y = sklearn.datasets.make_circles(n_samples=100,noise=0.2,factor=0.2,random_state=1) # circles数据集

# 网络参数
num_examples = len(X) # 训练样点数量
nn_input_dim = 2 # 输入层神经元数量
nn_output_dim = 2 # 输出层神经元数量

# 梯度下降参数
epsilon = 0.01 # 学习率
reg_lambda = 0.01 # 正则化

# 建立模型
z1_xy,a_xy,z2_xy,model = build_model(2, print_loss=False)

# 调整子图间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1, hspace=1)
# 绘制样本点
plt.figure(figsize=(17,11))
plt.subplot(2,2,1)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.title("Samples (Moons)") 
#plt.title("Samples (Circles)") 

# 绘制决策边界
plt.subplot(2,2,2)
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3(Circles)") 

# 绘制变换后特征空间
plt.subplot(2,3,4)
plot_transformed_representation(z1_xy,0,1,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&1 of hidden layer(fc)") 

plt.subplot(2,3,5)
plot_transformed_representation(a_xy,0,1,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&1 of hidden layer(tanh)") 

plt.subplot(2,3,6)
plot_transformed_representation(z2_xy,0,1,lambda x: predict(model,x),"z2")
plt.title("Neurons 0&1 of output layer(fc)") 

plt.show()
...;

<Figure size 432x288 with 0 Axes>

### Circles型数据 隐层2节点
# 设置随机数种子
np.random.seed(0)

# 生成数据集
#X, y = sklearn.datasets.make_moons(100, noise=0.20) # moons数据集
X, y = sklearn.datasets.make_circles(n_samples=100,noise=0.2,factor=0.2,random_state=1) # circles数据集

# 网络参数
num_examples = len(X) # 训练样点数量
nn_input_dim = 2 # 输入层神经元数量
nn_output_dim = 2 # 输出层神经元数量

# 梯度下降参数
epsilon = 0.01 # 学习率
reg_lambda = 0.01 # 正则化

# 建立模型
z1_xy,a_xy,z2_xy,model = build_model(2, print_loss=False)

# 调整子图间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1, hspace=1)
# 绘制样本点
plt.figure(figsize=(17,11))
plt.subplot(2,2,1)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.title("Samples (Moons)") 
#plt.title("Samples (Circles)") 

# 绘制决策边界
plt.subplot(2,2,2)
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3(Circles)") 

# 绘制变换后特征空间
plt.subplot(2,3,4)
plot_transformed_representation(z1_xy,0,1,lambda x: predict(model,x),"z1")
plt.title("Neurons 0&1 of hidden layer(fc)") 

plt.subplot(2,3,5)
plot_transformed_representation(a_xy,0,1,lambda x: predict(model,x),"a1")
plt.title("Neurons 0&1 of hidden layer(tanh)") 

plt.subplot(2,3,6)
plot_transformed_representation(z2_xy,0,1,lambda x: predict(model,x),"z2")
plt.title("Neurons 0&1 of output layer(fc)") 

plt.show()
...;

<Figure size 432x288 with 0 Axes>

可以看出，不同类型的数据集所需的神经元数量是不同的，两个神经元以及足以把Moons型数据分开，但是Circles则不行。如何根据数据恰到好处地设计出合适的神经网络？有人给出了如下回答：
·基于数据，画出期望决策边界
·将决策边界表示为一组直线
·直线的数量等于在第一隐藏层中的隐藏神经元数量
·为了连接之前创建的直线，增加新的隐藏层（每次需要在前一个隐藏层中创建直线间的连接时，都需要增加新的隐藏层）
·每个新的隐藏层中隐藏神经元的数量等于要建立的连接的数量
但是，根据已有的经验来看，似乎并不是一个神经元只能“划出一条线”
以下是一个"六边形"的例子:

## ReLU 激活函数 六边形过程分解
# ReLU函数
def relu(z):
    return np.maximum(z,0.0)

# 函数：绘制决策边界
def plot_decision_boundary(pred_func):
    # 设置边界范围 +-0.5留白
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01 #格网化步长
    # 格网化，生成网格点坐标矩阵
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # 预测网格点的取值
    z1_grid,a_grid,z2_grid,Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 绘制决策边界（填充等高线）和样本点
    #plt.subplot(3,2,2)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)

# 函数：计算损失函数
def calculate_loss(model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # 前向传播
    z1 = X.dot(W1) + b1
    a1 = relu(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    # 计算损失值
    corect_logprobs = -np.log(probs[range(num_examples), y])
    data_loss = np.sum(corect_logprobs)
    # 正则化
    data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples * data_loss

# 函数：预测 softmax输出0 或 1 
def predict(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # 前向传播
    z1 = x.dot(W1) + b1
    a1 = relu(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return z1,a1,z2,np.argmax(probs, axis=1)

# 函数：建立模型 nn_hdim为隐层神经元数 num_pass为训练batch数  print_loss若为True 每1000次迭代输出损失
def build_model(nn_hdim, num_passes=50000, print_loss=False): 
    # 用随机值初始化参数 准备学习
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))
    # 用于返回的模型
    model = {} 
    # 梯度下降 （对每个batch）
    for i in range(0, num_passes): 
        # 前向传播
        z1 = X.dot(W1) + b1
        a1 = relu(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) 
        # 反向传播
        delta3 = probs
        delta3[range(num_examples), y] -= 1
        delta3 /= num_examples
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = np.dot(delta3, W2.T)
        delta2[a1 <= 0] = 0
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0, keepdims=True) 
        # 正则化 (b1 和 b2 不需要)
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1
        # 梯度下降更新参数
        W1 += -epsilon * dW1
        b1 += -epsilon * db1
        W2 += -epsilon * dW2
        b2 += -epsilon * db2 
        # 将新参数值更新至模型model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        # 选择是否输出损失
        if print_loss and i % 1000 == 0:
          print ("Loss after iteration %i: %f" %(i, calculate_loss(model)))
    return z1,a1,z2,model


### Circles型数据 隐层3节点
# 设置随机数种子
np.random.seed(0)

# 生成数据集
#X, y = sklearn.datasets.make_moons(100, noise=0.20) # moons数据集
X, y = sklearn.datasets.make_circles(n_samples=400,noise=0.2,factor=0.2,random_state=1) # circles数据集

# 网络参数
num_examples = len(X) # 训练样点数量
nn_input_dim = 2 # 输入层神经元数量
nn_output_dim = 2 # 输出层神经元数量

# 梯度下降参数
epsilon = 0.01 # 学习率
reg_lambda = 0.01 # 正则化

# 建立模型
z1_xy,a_xy,z2_xy,model = build_model(3, print_loss=False)

# 调整子图间距
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=1, hspace=1)
# 绘制样本点
plt.figure(figsize=(17,15))
plt.subplot(3,2,1)
plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral)
plt.title("Samples (Circles)") 

# 绘制决策边界
plt.subplot(3,2,2)
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3(Circles) ReLU")

# “折叠”图
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.05
# 栅格化 应用ReLU
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
z1_grid,a_grid,z2_grid,Z = predict(model,np.c_[xx.ravel(), yy.ravel()])
# 平面应用ReLU 折叠
fig = plt.figure(figsize=(17,4))
ax = fig.add_subplot(1,4,1,projection="3d")
ax.scatter(xx,yy,a_grid[:,0].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z')
plt.title("RelU : Folding plane Z")
# 系数计算C*Z C=0.6;2;-1.5
ax = fig.add_subplot(1,4,2,projection="3d")
ax.scatter(xx,yy,0.6*a_grid[:,0].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane C*Z (C=0.6)")

ax = fig.add_subplot(1,4,3,projection="3d")
ax.scatter(xx,yy,2*a_grid[:,0].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane C*Z (C=2)")

ax = fig.add_subplot(1,4,4,projection="3d")
ax.scatter(xx,yy,-1.5*a_grid[:,0].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane C*Z (C=-1.5)")
...;

<Figure size 432x288 with 0 Axes>

ReLU激活函数的变换是一个“对平面进行翻折”的过程，如果考虑下一个神经元的运算，即加权和再加上偏置，可以先看系数的影响：1 < C 拉伸, 折面角度变小(变陡峭)；0 < C < 1 收缩, 折面角度变大(变平缓)；C < 0 翻转, 折面向下翻转

fig = plt.figure(figsize=(17,10))
ax = fig.add_subplot(2,3,1,projection="3d")
ax.scatter(xx,yy,a_grid[:,0].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane Z1")

ax = fig.add_subplot(2,3,2,projection="3d")
ax.scatter(xx,yy,a_grid[:,1].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane Z2")

ax = fig.add_subplot(2,3,3,projection="3d")
ax.scatter(xx,yy,a_grid[:,2].reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane Z3")

ax = fig.add_subplot(2,2,3,projection="3d")
ax.scatter(xx,yy,(a_grid[:,0]+a_grid[:,1]).reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane Z1+Z2")

ax = fig.add_subplot(2,2,4,projection="3d")
ax.scatter(xx,yy,(a_grid[:,0]+a_grid[:,1]+a_grid[:,2]-2.25).reshape(xx.shape),marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.scatter(xx,yy,0,marker="s",cmap=plt.cm.Spectral,alpha=0.3)
ax.set_xlabel('x0')
ax.set_ylabel('x1')
ax.set_zlabel('Z2')
plt.title("RelU : Folding plane Z1+Z2+Z3+b")
...;

多个激活值加权，代表在原有的基础上进行进一步的“翻折”，综合来看结果如上。在三次翻折的结果加上偏置项后，取Z=0平面的投影，就可以得到“六边形”。