快速卷积网络如何实现

车传新 2023-04-06 17:48:14

使用numpy实现了一个简单的卷积网络，但是训练速度比tensorflow慢10倍左右，请问还有哪里可以优化的吗

import numpy as np
import tensorflow as tf
import time

"""
将输入按卷积核拆分为N个卷积核大小的张量

Parameters
----------
A: 输入数据，形状为（样本数，宽，高，通道）
dim：当前卷积层的维度信息

Return
------
（输出宽 * 输出高， 样本数， 核宽 * 核高， 输入通道数）
"""
def split(A, dim):
    res = []
    num_datas = A.shape[0]
    width_A = A.shape[1]
    height_A = A.shape[2]
    channel_A = A.shape[3]
    width_kernel = dim['kernel'][0]
    height_kernel = dim['kernel'][1]
    stride_w = dim['strides'][0]
    stride_h = dim['strides'][1]
    for k in range(0, width_A - width_kernel + 1, stride_w):
        for j in range(0, height_A-height_kernel+1, stride_h):
            res.append(A[:, k:k+width_kernel, j:j+height_kernel, :].reshape(num_datas, width_kernel*height_kernel, channel_A))
    return np.array(res)

def split_pool(A, dim):
    res = []
    width_A = A.shape[1]
    height_A = A.shape[2]
    width_kernel = dim['kernel'][0]
    height_kernel = dim['kernel'][1]
    stride_w = dim['strides'][0]
    stride_h = dim['strides'][1]
    for k in range(0, width_A - width_kernel + 1, stride_w):
        for j in range(0, height_A-height_kernel+1, stride_h):
            slice = A[:, k:k + width_kernel, j:j + height_kernel, :]
            mask = (slice == np.max(slice, axis=(1, 2))[:, np.newaxis, np.newaxis, :])
            res.append(mask)
    return np.array(res)

def relu(Z):
    return np.maximum(0, Z)


def softmax(X):
    X_exp = np.exp(X)
    return X_exp / np.sum(X_exp, axis=0)


def loss_cross_entropy(Y_pre, Y_label):
    return -np.sum(Y_label * np.nan_to_num(np.log(Y_pre), nan=0))/Y_pre.shape[1]


class Conv2d:
    """
    Parameters
    ----------
    dims: type(0:输入，1：卷积， 2：池化， 3： 全连接) kernel（卷积核） strides（步长）
    """
    def __init__(self, dims):
        self.dims = dims
        self.rng = np.random.default_rng(seed=2)
        self.outs = []
        self.lrate = .1
        for i in range(len(dims) - 1):
            dim = dims[i + 1]
            dim_bef = dims[i]
            if dim['type'] == 1:
                width_bef = dim_bef['size_out'][0]
                height_bef = dim_bef['size_out'][1]
                channel_bef = dim_bef['size_out'][2]
                width_kernel = dim['kernel'][0]
                height_kernel = dim['kernel'][1]
                channel_out = dim['kernel'][2]
                width_out = width_bef - width_kernel + 1
                height_out = height_bef - height_kernel + 1
                dim['size_out'] = (width_out, height_out, channel_out)
                dim['weight'] = tf.keras.initializers.RandomNormal(0, .03, seed=2)(shape=(width_kernel, height_kernel, channel_bef, channel_out)).numpy()
                # dim['weight'] = self.rng.normal(0, .03, (width_kernel, height_kernel, channel_bef, channel_out))
                dim['bias'] = np.zeros(channel_out)
            if dim['type'] == 2:
                width_bef = dim_bef['size_out'][0]
                height_bef = dim_bef['size_out'][1]
                channel_bef = dim_bef['size_out'][2]
                width_kernel = dim['kernel'][0]
                height_kernel = dim['kernel'][1]
                width_out = int((width_bef - width_kernel)/width_kernel) + 1
                height_out = int((height_bef - height_kernel)/height_kernel) + 1
                dim['size_out'] = (width_out, height_out, channel_bef)
            if dim['type'] == 3:
                size_out = dim['size_out']
                if dim_bef['type'] != 3:
                    width_bef = dim_bef['size_out'][0]
                    height_bef = dim_bef['size_out'][1]
                    channel_bef = dim_bef['size_out'][2]
                    size_in = width_bef * height_bef * channel_bef
                else:
                    size_in = dim_bef['size_out']
                dim['weight'] = tf.keras.initializers.RandomNormal(0, .03, seed=2)(shape=(size_in, size_out)).numpy().T
                #dim['weight'] = self.rng.normal(0, .03, (size_out, size_in))
                dim['bias'] = np.zeros(shape=(size_out, 1))
    def forward(self, X, Y):
        start_time = time.time()*1000
        cost_split = 0
        self.outs = [(X, X)]
        A = X
        for i in range(len(self.dims) - 1):
            dim = self.dims[i + 1]
            dim_bef = self.dims[i]
            type = dim['type']
            num_datas = A.shape[0]
            if type == 1:
                weight = dim['weight']
                bias = dim['bias']
                width_out = dim['size_out'][0]
                height_out = dim['size_out'][1]
                channel_out = dim['size_out'][2]
                #（输出宽 * 输出高， 样本数， 核宽 * 核高， 输入通道数）
                start_split = time.time() * 1000
                A_split = split(A, dim)
                cost_split += time.time() * 1000 - start_split
                A_split = A_split.reshape(-1, A_split.shape[-2] * A_split.shape[-1])
                #（核宽*核高*输入通道，输出通道）
                W = weight.reshape(-1, weight.shape[-1])
                #（输出宽*输出高*样本数，输出通道）
                Z = A_split.dot(W).reshape(width_out, height_out, num_datas, channel_out).transpose(2, 0, 1, 3) + bias
                A = relu(Z)
            if type == 2:
                width_out = dim['size_out'][0]
                height_out = dim['size_out'][1]
                channel_out = dim['size_out'][2]
                start_split = time.time() * 1000
                #（输出宽 * 输出高， 样本数， 核宽 * 核高， 输入通道数）
                A_split = split(A, dim)
                cost_split += time.time() * 1000 - start_split
                A = np.max(A_split, axis=2).reshape(width_out, height_out, num_datas, channel_out).transpose(2, 0, 1, 3)
            if type == 3:
                weight = dim['weight']
                bias = dim['bias']
                if dim_bef['type'] != 3:
                    A = A.reshape(A.shape[0], A.shape[1] * A.shape[2] * A.shape[3]).T
                Z = weight.dot(A) + bias
                if i == len(self.dims) - 2:
                    A = softmax(Z)
                else:
                    A = relu(Z)
            self.outs.append((Z, A))
        loss = loss_cross_entropy(A, Y)
        end_time = time.time() * 1000
        cost_time = end_time - start_time
        return A, loss, int(cost_time), int(cost_split)
    def backward(self, Y):
        start_time = time.time() * 1000
        num_datas = Y.shape[1]
        Z, A = self.outs[-1]
        dZ = (A - Y)/Y.shape[1]
        for i in range(len(self.dims) - 1):
            dim = self.dims[-1-i]
            dim_bef = self.dims[-2-i]
            Z_bef, A_bef = self.outs[-2 - i]
            Z, A = self.outs[-1-i]
            if dim['type'] == 3:
                if i > 0:
                    dZ = dA * (Z > 0)
                if A_bef.ndim != 2:
                    A_bef = A_bef.reshape(A_bef.shape[0], -1).T
                dW = dZ.dot(A_bef.T)
                dB = np.sum(dZ, axis=1)[..., np.newaxis]
                dA = dim['weight'].T.dot(dZ)
                dim['weight'] -= self.lrate * dW
                dim['bias'] -= self.lrate * dB
            if dim['type'] == 1:
                width_kernel = dim['kernel'][0]
                height_kernel = dim['kernel'][1]
                width_out = dim['size_out'][0]
                height_out = dim['size_out'][1]
                channel_out = dim['size_out'][2]
                width_in = dim_bef['size_out'][0]
                height_in = dim_bef['size_out'][1]
                channel_in = dim_bef['size_out'][2]
                weight = dim['weight'];
                if dA.ndim == 2:
                    dA = dA.T.reshape(num_datas, width_out, height_out, channel_out)
                dZ = dA * (Z > 0)
                dB = np.sum(dZ, axis=(0, 1, 2))
                #（输出宽 * 输出高， 样本数， 核宽 * 核高， 输入通道数）
                A_split = split(A_bef, dim)
                #（核宽*核高*输入通道数，输出宽*输出高*样本数）
                A_split = A_split.reshape(-1, A_split.shape[-2] * A_split.shape[-1]).T
                #（输出宽*输出高*样本数，输出通道数）
                dZ_flatten = dZ.transpose(1, 2, 0, 3).reshape(-1, dZ.shape[-1])
                #（核宽*核高*输入通道数，输出通道数）
                dW = A_split.dot(dZ_flatten).reshape(width_kernel, height_kernel, channel_in, channel_out)
                width_pad = width_kernel - 1
                height_pad = height_kernel - 1
                dZ_pad = np.pad(dZ, ((0, 0), (width_pad,width_pad), (height_pad,height_pad), (0,0)), mode='constant')
                #（核宽 * 核高 * 输出通道数，输入通道数）
                weight_rot = np.rot90(weight, k=2).transpose(0, 1, 3, 2).reshape(-1, weight.shape[2])
                #（输入宽 * 输入高， 样本数， 核宽 * 核高， 输出通道数）
                dZ_split = split(dZ_pad, dim)
                dZ_split = dZ_split.reshape(-1, dZ_split.shape[-2] * dZ_split.shape[-1])
                # （输入宽 * 输入高 * 样本数， 输入通道数）
                dA = dZ_split.dot(weight_rot).reshape(width_in, height_in, num_datas, channel_in).transpose(2, 0, 1, 3)
                dim['weight'] -= self.lrate * dW
                dim['bias'] -= self.lrate * dB
            if dim['type'] == 2:
                width_out = dim['size_out'][0]
                height_out = dim['size_out'][1]
                channel_out = dim['size_out'][2]
                width_kernel = dim['kernel'][0]
                height_kernel = dim['kernel'][1]
                width_in = dim_bef['size_out'][0]
                height_in = dim_bef['size_out'][1]
                channel_in = dim_bef['size_out'][2]
                dZ = dA
                if dZ.ndim == 2:
                    dZ = dZ.T.reshape(num_datas, width_out, height_out, channel_out)
                #（输出宽 * 输出高， 样本数， 核宽， 核高， 输入通道数）
                A_split = split_pool(A_bef, dim)
                # （样本数， 输出宽， 输出高， 核宽， 核高， 输入通道数）
                A_split = A_split.transpose(1, 0, 2, 3, 4).reshape(num_datas, width_out, height_out, width_kernel, height_kernel, channel_in)
                dA = A_split * dZ[:, :, :, np.newaxis, np.newaxis, :]
                dA = dA.transpose(0, 1, 3, 2, 4, 5).reshape(num_datas, width_out*width_kernel, height_out*height_kernel, channel_in)
                width_pad = width_in - width_out*width_kernel
                height_pad = height_in - height_out*height_kernel
                dA = np.pad(dA, ((0, 0), (0, width_pad), (0, height_pad), (0, 0)), mode='constant')
        end_time = time.time() * 1000
        cost_time = end_time - start_time
        return int(cost_time)

...全文