初始化数据
import numpy as np
# 训练集
with open('./minist_data/train-images.idx3-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
train_data = loaded[16:60000 * 784 + 16].reshape((60000, 784))
with open('./minist_data/train-labels.idx1-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
train_labels = loaded[8:60000 + 8].reshape(60000)
# 手写痕迹为1 背景为0
for i in range(60000):
for n in range(784):
if train_data[i][n] > 0:
train_data[i][n] = 1
# 测试集
with open('./minist_data/t10k-images.idx3-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
test_data = loaded[16:10000 * 784 + 16].reshape((10000, 784))
# 测试集标签
with open('./minist_data/t10k-labels.idx1-ubyte') as f:
loaded = np.fromfile(file=f, dtype=np.uint8)
test_labels = loaded[8:10000 + 8].reshape(10000)
# 手写痕迹为1 背景为0
for i in range(10000):
for n in range(784):
if test_data[i][n] > 0:
test_data[i][n] = 1
np.save("./minist_data/train_data.npy", train_data)
np.save("./minist_data/train_labels.npy", train_labels)
np.save("./minist_data/test_data.npy", test_data)
np.save("./minist_data/test_labels.npy", test_labels)
训练
import numpy as np
# 载入参数
parameter = np.zeros((784, 10), dtype=np.float32)
bias = np.zeros(10, dtype=np.float32)
numbers = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
# 迭代次数
the_times = 1
# 训练数量
train_numbers = 60000
train_data = np.load("./minist_data/train_data.npy")
train_labels = np.load("./minist_data/train_labels.npy")
# 求参数
for times in range(the_times):
random_data = np.empty((train_numbers, 784), dtype=np.float32)
random_labels = np.empty((train_numbers, 10), dtype=np.float32)
for m in range(train_numbers):
# ran = np.random.randint(0, 60000)
ran = m
random_data[m] = train_data[ran]
random_labels[m] = numbers[train_labels[ran]]
ATB = np.matmul(random_data.T, random_labels)
ATA = np.matmul(random_data.T, random_data)
# 逆
# ATA_1 = np.linalg.inv(ATA)
# 违逆
ATA_1 = np.linalg.pinv(ATA)
parameter += np.matmul(ATA_1, ATB) * (1 / the_times)
print(times)
# 求偏置
output = np.matmul(train_data, parameter)
for m in range(60000):
bias += output[m] - numbers[train_labels[m]]
bias /= 60000
np.save("./minist_data/parameter.npy", parameter)
np.save("./minist_data/bias.npy", bias)
测试
import numpy as np
test_data = np.load("./minist_data/test_data.npy")
test_labels = np.load("./minist_data/test_labels.npy")
parameter = np.load("./minist_data/parameter.npy")
bias = np.load("./minist_data/bias.npy")
test_number = 10000
# 映射对应的矩阵
numbers = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
right_sum = 0
worse_sum = 0
right = np.zeros(10, dtype=np.int16)
worse = np.zeros(10, dtype=np.int16)
output = np.matmul(test_data, parameter)
output = output + bias
for m in range(test_number):
if test_labels[m] == np.argmax(output[m]):
right_sum += 1
right[test_labels[m]] += 1
else:
worse_sum += 1
worse[test_labels[m]] += 1
if m % 1000 == 0:
print(test_labels[m], output[m])
# 识别正确数
print(right_sum, right)
# 识别错误数
print(worse_sum, worse)
Comments NOTHING