44,502
社区成员
发帖
与我相关
我的任务
分享
使用numpy实现了一个简单的卷积网络,但是训练速度比tensorflow慢10倍左右,请问还有哪里可以优化的吗
import numpy as np
import tensorflow as tf
import time
"""
将输入按卷积核拆分为N个卷积核大小的张量
Parameters
----------
A: 输入数据,形状为(样本数,宽,高,通道)
dim:当前卷积层的维度信息
Return
------
(输出宽 * 输出高, 样本数, 核宽 * 核高, 输入通道数)
"""
def split(A, dim):
res = []
num_datas = A.shape[0]
width_A = A.shape[1]
height_A = A.shape[2]
channel_A = A.shape[3]
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
stride_w = dim['strides'][0]
stride_h = dim['strides'][1]
for k in range(0, width_A - width_kernel + 1, stride_w):
for j in range(0, height_A-height_kernel+1, stride_h):
res.append(A[:, k:k+width_kernel, j:j+height_kernel, :].reshape(num_datas, width_kernel*height_kernel, channel_A))
return np.array(res)
def split_pool(A, dim):
res = []
width_A = A.shape[1]
height_A = A.shape[2]
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
stride_w = dim['strides'][0]
stride_h = dim['strides'][1]
for k in range(0, width_A - width_kernel + 1, stride_w):
for j in range(0, height_A-height_kernel+1, stride_h):
slice = A[:, k:k + width_kernel, j:j + height_kernel, :]
mask = (slice == np.max(slice, axis=(1, 2))[:, np.newaxis, np.newaxis, :])
res.append(mask)
return np.array(res)
def relu(Z):
return np.maximum(0, Z)
def softmax(X):
X_exp = np.exp(X)
return X_exp / np.sum(X_exp, axis=0)
def loss_cross_entropy(Y_pre, Y_label):
return -np.sum(Y_label * np.nan_to_num(np.log(Y_pre), nan=0))/Y_pre.shape[1]
class Conv2d:
"""
Parameters
----------
dims: type(0:输入,1:卷积, 2:池化, 3: 全连接) kernel(卷积核) strides(步长)
"""
def __init__(self, dims):
self.dims = dims
self.rng = np.random.default_rng(seed=2)
self.outs = []
self.lrate = .1
for i in range(len(dims) - 1):
dim = dims[i + 1]
dim_bef = dims[i]
if dim['type'] == 1:
width_bef = dim_bef['size_out'][0]
height_bef = dim_bef['size_out'][1]
channel_bef = dim_bef['size_out'][2]
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
channel_out = dim['kernel'][2]
width_out = width_bef - width_kernel + 1
height_out = height_bef - height_kernel + 1
dim['size_out'] = (width_out, height_out, channel_out)
dim['weight'] = tf.keras.initializers.RandomNormal(0, .03, seed=2)(shape=(width_kernel, height_kernel, channel_bef, channel_out)).numpy()
# dim['weight'] = self.rng.normal(0, .03, (width_kernel, height_kernel, channel_bef, channel_out))
dim['bias'] = np.zeros(channel_out)
if dim['type'] == 2:
width_bef = dim_bef['size_out'][0]
height_bef = dim_bef['size_out'][1]
channel_bef = dim_bef['size_out'][2]
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
width_out = int((width_bef - width_kernel)/width_kernel) + 1
height_out = int((height_bef - height_kernel)/height_kernel) + 1
dim['size_out'] = (width_out, height_out, channel_bef)
if dim['type'] == 3:
size_out = dim['size_out']
if dim_bef['type'] != 3:
width_bef = dim_bef['size_out'][0]
height_bef = dim_bef['size_out'][1]
channel_bef = dim_bef['size_out'][2]
size_in = width_bef * height_bef * channel_bef
else:
size_in = dim_bef['size_out']
dim['weight'] = tf.keras.initializers.RandomNormal(0, .03, seed=2)(shape=(size_in, size_out)).numpy().T
#dim['weight'] = self.rng.normal(0, .03, (size_out, size_in))
dim['bias'] = np.zeros(shape=(size_out, 1))
def forward(self, X, Y):
start_time = time.time()*1000
cost_split = 0
self.outs = [(X, X)]
A = X
for i in range(len(self.dims) - 1):
dim = self.dims[i + 1]
dim_bef = self.dims[i]
type = dim['type']
num_datas = A.shape[0]
if type == 1:
weight = dim['weight']
bias = dim['bias']
width_out = dim['size_out'][0]
height_out = dim['size_out'][1]
channel_out = dim['size_out'][2]
#(输出宽 * 输出高, 样本数, 核宽 * 核高, 输入通道数)
start_split = time.time() * 1000
A_split = split(A, dim)
cost_split += time.time() * 1000 - start_split
A_split = A_split.reshape(-1, A_split.shape[-2] * A_split.shape[-1])
#(核宽*核高*输入通道,输出通道)
W = weight.reshape(-1, weight.shape[-1])
#(输出宽*输出高*样本数,输出通道)
Z = A_split.dot(W).reshape(width_out, height_out, num_datas, channel_out).transpose(2, 0, 1, 3) + bias
A = relu(Z)
if type == 2:
width_out = dim['size_out'][0]
height_out = dim['size_out'][1]
channel_out = dim['size_out'][2]
start_split = time.time() * 1000
#(输出宽 * 输出高, 样本数, 核宽 * 核高, 输入通道数)
A_split = split(A, dim)
cost_split += time.time() * 1000 - start_split
A = np.max(A_split, axis=2).reshape(width_out, height_out, num_datas, channel_out).transpose(2, 0, 1, 3)
if type == 3:
weight = dim['weight']
bias = dim['bias']
if dim_bef['type'] != 3:
A = A.reshape(A.shape[0], A.shape[1] * A.shape[2] * A.shape[3]).T
Z = weight.dot(A) + bias
if i == len(self.dims) - 2:
A = softmax(Z)
else:
A = relu(Z)
self.outs.append((Z, A))
loss = loss_cross_entropy(A, Y)
end_time = time.time() * 1000
cost_time = end_time - start_time
return A, loss, int(cost_time), int(cost_split)
def backward(self, Y):
start_time = time.time() * 1000
num_datas = Y.shape[1]
Z, A = self.outs[-1]
dZ = (A - Y)/Y.shape[1]
for i in range(len(self.dims) - 1):
dim = self.dims[-1-i]
dim_bef = self.dims[-2-i]
Z_bef, A_bef = self.outs[-2 - i]
Z, A = self.outs[-1-i]
if dim['type'] == 3:
if i > 0:
dZ = dA * (Z > 0)
if A_bef.ndim != 2:
A_bef = A_bef.reshape(A_bef.shape[0], -1).T
dW = dZ.dot(A_bef.T)
dB = np.sum(dZ, axis=1)[..., np.newaxis]
dA = dim['weight'].T.dot(dZ)
dim['weight'] -= self.lrate * dW
dim['bias'] -= self.lrate * dB
if dim['type'] == 1:
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
width_out = dim['size_out'][0]
height_out = dim['size_out'][1]
channel_out = dim['size_out'][2]
width_in = dim_bef['size_out'][0]
height_in = dim_bef['size_out'][1]
channel_in = dim_bef['size_out'][2]
weight = dim['weight'];
if dA.ndim == 2:
dA = dA.T.reshape(num_datas, width_out, height_out, channel_out)
dZ = dA * (Z > 0)
dB = np.sum(dZ, axis=(0, 1, 2))
#(输出宽 * 输出高, 样本数, 核宽 * 核高, 输入通道数)
A_split = split(A_bef, dim)
#(核宽*核高*输入通道数,输出宽*输出高*样本数)
A_split = A_split.reshape(-1, A_split.shape[-2] * A_split.shape[-1]).T
#(输出宽*输出高*样本数,输出通道数)
dZ_flatten = dZ.transpose(1, 2, 0, 3).reshape(-1, dZ.shape[-1])
#(核宽*核高*输入通道数,输出通道数)
dW = A_split.dot(dZ_flatten).reshape(width_kernel, height_kernel, channel_in, channel_out)
width_pad = width_kernel - 1
height_pad = height_kernel - 1
dZ_pad = np.pad(dZ, ((0, 0), (width_pad,width_pad), (height_pad,height_pad), (0,0)), mode='constant')
#(核宽 * 核高 * 输出通道数,输入通道数)
weight_rot = np.rot90(weight, k=2).transpose(0, 1, 3, 2).reshape(-1, weight.shape[2])
#(输入宽 * 输入高, 样本数, 核宽 * 核高, 输出通道数)
dZ_split = split(dZ_pad, dim)
dZ_split = dZ_split.reshape(-1, dZ_split.shape[-2] * dZ_split.shape[-1])
# (输入宽 * 输入高 * 样本数, 输入通道数)
dA = dZ_split.dot(weight_rot).reshape(width_in, height_in, num_datas, channel_in).transpose(2, 0, 1, 3)
dim['weight'] -= self.lrate * dW
dim['bias'] -= self.lrate * dB
if dim['type'] == 2:
width_out = dim['size_out'][0]
height_out = dim['size_out'][1]
channel_out = dim['size_out'][2]
width_kernel = dim['kernel'][0]
height_kernel = dim['kernel'][1]
width_in = dim_bef['size_out'][0]
height_in = dim_bef['size_out'][1]
channel_in = dim_bef['size_out'][2]
dZ = dA
if dZ.ndim == 2:
dZ = dZ.T.reshape(num_datas, width_out, height_out, channel_out)
#(输出宽 * 输出高, 样本数, 核宽, 核高, 输入通道数)
A_split = split_pool(A_bef, dim)
# (样本数, 输出宽, 输出高, 核宽, 核高, 输入通道数)
A_split = A_split.transpose(1, 0, 2, 3, 4).reshape(num_datas, width_out, height_out, width_kernel, height_kernel, channel_in)
dA = A_split * dZ[:, :, :, np.newaxis, np.newaxis, :]
dA = dA.transpose(0, 1, 3, 2, 4, 5).reshape(num_datas, width_out*width_kernel, height_out*height_kernel, channel_in)
width_pad = width_in - width_out*width_kernel
height_pad = height_in - height_out*height_kernel
dA = np.pad(dA, ((0, 0), (0, width_pad), (0, height_pad), (0, 0)), mode='constant')
end_time = time.time() * 1000
cost_time = end_time - start_time
return int(cost_time)