初始化数据
import numpy as np
# 训练集初始化
with open('./minist_data/train-images.idx3-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
train_data = loaded[16:60000 * 784 + 16].reshape((60000, 784))
with open('./minist_data/train-labels.idx1-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
train_labels = loaded[8:60000 + 8].reshape(60000)
# 手写痕迹为1 背景为0
for i in range(60000):
for n in range(784):
if train_data[i][n] > 0:
train_data[i][n] = 1
# 测试集初始化
with open('./minist_data/t10k-images.idx3-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
test_data = loaded[16:10000 * 784 + 16].reshape((10000, 784))
# 测试集标签
with open('./minist_data/t10k-labels.idx1-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
test_labels = loaded[8:10000 + 8].reshape(10000)
# 手写痕迹为1 背景为0
for i in range(10000):
for n in range(784):
if test_data[i][n] > 0:
test_data[i][n] = 1
np.save("./minist_data/train_data.npy", train_data)
np.save("./minist_data/train_labels.npy", train_labels)
np.save("./minist_data/test_data.npy", test_data)
np.save("./minist_data/test_labels.npy", test_labels)
# 偏置初始化
hidden_bias0 = np.random.randn(256)
hidden_bias1 = np.random.randn(100)
hidden_bias2 = np.random.randn(64)
output_bias = np.random.randn(10)
np.save("./minist_data/hidden_bias0.npy", hidden_bias0)
np.save("./minist_data/hidden_bias1.npy", hidden_bias1)
np.save("./minist_data/hidden_bias2.npy", hidden_bias2)
np.save("./minist_data/output_bias.npy", output_bias)
# 参数初始化
hidden_parameter0 = np.random.randn(784, 256) * (1.0 / np.sqrt(256))
hidden_parameter1 = np.random.randn(256, 100) * (1.0 / np.sqrt(100))
hidden_parameter2 = np.random.randn(100, 64) * (1.0 / np.sqrt(64))
output_parameter = np.random.randn(64, 10) * (1.0 / np.sqrt(10))
np.save("./minist_data/hidden_parameter0.npy", hidden_parameter0)
np.save("./minist_data/hidden_parameter1.npy", hidden_parameter1)
np.save("./minist_data/hidden_parameter2.npy", hidden_parameter2)
np.save("./minist_data/output_parameter.npy", output_parameter)
print("初始化完成")
训练
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# 导入数据
train_data = np.load("./minist_data/train_data.npy")
train_labels = np.load("./minist_data/train_labels.npy")
hidden_parameter0 = np.load('./minist_data/hidden_parameter0.npy')
hidden_parameter1 = np.load('./minist_data/hidden_parameter1.npy')
hidden_parameter2 = np.load('./minist_data/hidden_parameter2.npy')
output_parameter = np.load('./minist_data/output_parameter.npy')
hidden_bias0 = np.load('./minist_data/hidden_bias0.npy')
hidden_bias1 = np.load('./minist_data/hidden_bias1.npy')
hidden_bias2 = np.load('./minist_data/hidden_bias2.npy')
output_bias = np.load('./minist_data/output_bias.npy')
numbers = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
# 随机梯度下降的数据集数量
random_number = 10
# 学习率
learning_rate = 0.001
# 随机梯度下降次数
for times in range(60000):
random_train_data = np.empty((random_number, 784), dtype=np.float32)
random_train_labels = np.empty((random_number, 10), dtype=np.float32)
the_number = np.random.randint(0, 60000, size=random_number)
for m in range(random_number):
random_train_data[m] = train_data[the_number[m]]
random_train_labels[m] = numbers[train_labels[the_number[m]]]
J_hidden0_parameter_derivative = np.zeros((784, 256), dtype=np.float32)
J_hidden1_parameter_derivative = np.zeros((256, 100), dtype=np.float32)
J_hidden2_parameter_derivative = np.zeros((100, 64), dtype=np.float32)
J_output_parameter_derivative = np.zeros((64, 10), dtype=np.float32)
hidden_output0 = np.matmul(random_train_data, hidden_parameter0)
hidden_output0 = sigmoid(hidden_output0)
hidden_output0_derivative = hidden_output0 * (1 - hidden_output0)
hidden_output1 = np.matmul(hidden_output0, hidden_parameter1)
hidden_output1 = sigmoid(hidden_output1)
hidden_output1_derivative = hidden_output1 * (1 - hidden_output1)
hidden_output2 = np.matmul(hidden_output1, hidden_parameter2)
hidden_output2 = sigmoid(hidden_output2)
hidden_output2_derivative = hidden_output2 * (1 - hidden_output2)
output_output = np.matmul(hidden_output2, output_parameter)
output_output = sigmoid(output_output)
output_output_derivative = output_output * (1 - output_output)
error = output_output - random_train_labels
J_output_output_derivative = error * output_output_derivative
for m in range(random_number):
for i in range(64):
J_output_parameter_derivative[i] += hidden_output2[m][i] * J_output_output_derivative[m]
J_hidden2_output_derivative = np.matmul(J_output_output_derivative, output_parameter.T) * hidden_output2_derivative
for m in range(random_number):
for i in range(100):
J_hidden2_parameter_derivative[i] += hidden_output1[m][i] * J_hidden2_output_derivative[m]
J_hidden1_output_derivative = np.matmul(J_hidden2_output_derivative, hidden_parameter2.T) * hidden_output1_derivative
for m in range(random_number):
for i in range(256):
J_hidden1_parameter_derivative[i] += hidden_output0[m][i] * J_hidden1_output_derivative[m]
J_hidden0_output_derivative = np.matmul(J_hidden1_output_derivative, hidden_parameter1.T) * hidden_output0_derivative
for m in range(random_number):
for i in range(784):
J_hidden0_parameter_derivative[i] += random_train_data[m][i] * J_hidden0_output_derivative[m]
# 优化
output_parameter -= (learning_rate / random_number) * J_output_parameter_derivative
hidden_parameter2 -= (learning_rate / random_number) * J_hidden2_parameter_derivative
hidden_parameter1 -= (learning_rate / random_number) * J_hidden1_parameter_derivative
hidden_parameter0 -= (learning_rate / random_number) * J_hidden0_parameter_derivative
if times % 100 == 0:
np.save("./minist_data/hidden_parameter0.npy", hidden_parameter0)
np.save("./minist_data/hidden_parameter1.npy", hidden_parameter1)
np.save("./minist_data/hidden_parameter2.npy", hidden_parameter2)
np.save("./minist_data/output_parameter.npy", output_parameter)
print(times)
测试
import numpy as np
import time
start = time.time()
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def softmax(x):
e_x = np.exp(x)
return e_x / e_x.sum()
# 测试集数量
test_number = 10000
test_data = np.load("./minist_data/test_data.npy")
test_labels = np.load("./minist_data/test_labels.npy")
# 映射对应的矩阵
numbers = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
# 隐藏层参数
hidden_parameter0 = np.load('./minist_data/hidden_parameter0.npy')
hidden_parameter1 = np.load('./minist_data/hidden_parameter1.npy')
hidden_parameter2 = np.load('./minist_data/hidden_parameter2.npy')
output_parameter = np.load('./minist_data/output_parameter.npy')
hidden_bias0 = np.load('./minist_data/hidden_bias0.npy')
hidden_bias1 = np.load('./minist_data/hidden_bias1.npy')
hidden_bias2 = np.load('./minist_data/hidden_bias2.npy')
output_bias = np.load('./minist_data/output_bias.npy')
hidden_output0 = np.matmul(test_data, hidden_parameter0)
hidden_output0 = sigmoid(hidden_output0)
hidden_output1 = np.matmul(hidden_output0, hidden_parameter1)
hidden_output1 = sigmoid(hidden_output1)
hidden_output2 = np.matmul(hidden_output1, hidden_parameter2)
hidden_output2 = sigmoid(hidden_output2)
output_output = np.matmul(hidden_output2, output_parameter)
output_output = sigmoid(output_output)
right = np.zeros(10, dtype=np.int16)
worse = np.zeros(10, dtype=np.int16)
right_sum = 0
worse_sum = 0
# 判断
for m in range(test_number):
# right[test_labels[m]] += 1
if test_labels[m] == np.argmax(output_output[m]):
right_sum += 1
right[test_labels[m]] += 1
else:
worse_sum += 1
worse[test_labels[m]] += 1
if m % 1000 == 0:
print(test_labels[m], softmax(output_output[m] * 7))
# print(test_labels[m], output_output[m])
print("识别正确数:", right_sum, right)
print("识别失败数:", worse_sum, worse)
print(test_number, "例用时:", time.time() - start)
Comments NOTHING