60行代码徒手实现深度神经网络

准备数据集

# 获取数据集
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
scaler = preprocessing.MinMaxScaler()
data = scaler.fit_transform(breast['data'])
target = breast['target']
X_train,X_test,y_train,y_test = train_test_split(data,target)

NN实现代码

import numpy as np
import pandas as pd
#定义激活函数
ReLu = lambda z:np.maximum(0.0,z)
d_ReLu = lambda z:np.where(z<0,0,1)
LeakyReLu = lambda z:np.maximum(0.01*z,z)
d_LeakyReLu = lambda z:np.where(z=0.5] = 1
Y_test[Y_prob< 0.5] = 0
return(Y_test)

单隐层神经网络

# 采用Sigmoid激活函数
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'Sigmoid')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

# 采用 Tanh激活函数
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'Tanh')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

# 采用 ReLu激活函数
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

# 采用 LeakyReLu激活函数
NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'LeakyReLu')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

双隐层神经网络

# 设置两个隐藏层，采用ReLu激活函数
NN = NNClassifier(n = [np.nan,5,5,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

AUC得分0.99874比采用单隐藏层的最优得分0.99958有所降低，可能是模型复杂度过高，我们尝试减少隐藏层节点的个数至3以降低模型复杂度。

# 双隐藏层，隐藏层节点数为3
NN = NNClassifier(n = [np.nan,3,3,1],alpha = 0.02,
ITERNUM = 200000, gfunc = 'ReLu')
NN.fit(X_train,y_train)
# 绘制目标函数迭代曲线
%matplotlib inline
NN.dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = NN.predict_prob(X_test)
roc_auc_score(list(y_test),list(Y_prob))

AUC得分0.99979，又有所提高。

# 和sklearn中的模型对比
from sklearn.neural_network import MLPClassifier
# 第一隐藏层神经元个数为3，第二隐藏层神经元个数为3
MLPClf = MLPClassifier(hidden_layer_sizes=(3,3),max_iter=200000,activation='relu')
MLPClf.fit(X_train,y_train)
# 绘制目标函数迭代曲线
dfJ = pd.DataFrame(data = np.array(MLPClf.loss_curve_),columns = ['J'])
dfJ.plot(figsize = (12,8))
# 测试在验证集的auc得分
from sklearn.metrics import roc_auc_score
Y_prob = MLPClf.predict_proba(X_test)[:,1]
roc_auc_score(list(y_test),list(Y_prob))