模块1:resize数据集
import os
import cv2
def resize_img(DATADIR, data_k, img_size):
w = img_size[0]
h = img_size[1]
'''设置目标像素大小,此处设为300'''
path = os.path.join(DATADIR, data_k)#按顺序拼接多个路径
# 返回path路径下所有文件的名字
img_list = os.listdir(path)
for i in img_list:
if i.endswith('.jpg'):
# 调用cv2.imread读入图片,读入格式为IMREAD_COLOR
img_array = cv2.imread((path + '/' + i), cv2.IMREAD_COLOR)
# 调用cv2.resize函数resize图片
new_array = cv2.resize(img_array, (w, h), interpolation=cv2.INTER_CUBIC)
img_name = str(i)
'''生成图片存储的目标文件夹'''
save_path = path + '_new/'
if os.path.exists(save_path):
print(i)
'''调用cv.2的imwrite函数保存图片'''
save_img = save_path + img_name
cv2.imwrite(save_img, new_array)
else:
os.mkdir(save_path)
save_img = save_path + img_name
cv2.imwrite(save_img, new_array)
if __name__ == '__main__':
# 设置图片路径
DATADIR = "F:/Github/cat_dataset/cat_12_test"
data_k = ' '
# 需要修改的新的尺寸
img_size = [224, 224]
resize_img(DATADIR, data_k, img_size)
模块2:利用.txt文件分类整体数据集
import numpy as np
import os
import shutil
filename = 'train_list.txt' # txt文件和当前脚本在同一目录下,所以不用写具体路径
pos = []
Efield = []
with open(filename, 'r') as file_to_read:
while True:
lines = file_to_read.readline() # 整行读取数据
if not lines:
break
p_tmp, E_tmp = [i for i in lines.split()] # 将整行数据分割处理,如果分割符是空格,括号里就不用传入参数,如果是逗号, 则传入‘,'字符。
if not os.path.exists(E_tmp):#判断指定文件是否存在,结果是bool值
os.makedirs(E_tmp)#创建文件夹
shutil.move(p_tmp,E_tmp)
模块3:模型
from tensorflow.keras import layers, models, Model, Sequential
def GoogLeNet(im_height=224, im_width=224, class_num=12, aux_logits=False):
# tensorflow中的tensor通道排序是NHWC
input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
# (None, 224, 224, 3)
x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image)
# (None, 112, 112, 64)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x)
# (None, 56, 56, 64)
x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x)
# (None, 56, 56, 192)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)
# (None, 28, 28, 192)
x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x)
# (None, 28, 28, 256)
x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x)
# (None, 28, 28, 480)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
# (None, 14, 14, 480)
x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x)
if aux_logits:#如果为true就创建辅助分类器
aux1 = InceptionAux(class_num, name="aux_1")(x)
# (None, 14, 14, 512)
x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x)
# (None, 14, 14, 512)
x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x)
# (None, 14, 14, 512)
x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x)
if aux_logits:
aux2 = InceptionAux(class_num, name="aux_2")(x)
# (None, 14, 14, 528)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x)
# (None, 14, 14, 532)
x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x)
# (None, 7, 7, 832)
x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x)
# (None, 7, 7, 832)
x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x)
# (None, 7, 7, 1024)
x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)
# (None, 1, 1, 1024)
x = layers.Flatten(name="output_flatten")(x)
# (None, 1024)
x = layers.Dropout(rate=0.4, name="output_dropout")(x)
x = layers.Dense(class_num, name="output_dense")(x)
# (None, class_num)
aux3 = layers.Softmax(name="aux_3")(x)
if aux_logits:
model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])#第一个和第二个是辅助分类器 第三个是主输出
else:
model = models.Model(inputs=input_image, outputs=aux3)
return model
class Inception(layers.Layer):
def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):#**kwargs传入我们的层名称
super(Inception, self).__init__(**kwargs)
self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation="relu")
#有两个卷积层 用sequential类讲二者合并 步距是=1 就没有写 保证可以在深度上进行拼接
self.branch2 = Sequential([
layers.Conv2D(ch3x3red, kernel_size=1, activation="relu"),
layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", activation="relu")]) # output_size= input_size
self.branch3 = Sequential([
layers.Conv2D(ch5x5red, kernel_size=1, activation="relu"),
layers.Conv2D(ch5x5, kernel_size=5, padding="SAME", activation="relu")]) # output_size= input_size
self.branch4 = Sequential([
layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"), # caution: default strides==pool_size
layers.Conv2D(pool_proj, kernel_size=1, activation="relu")]) # output_size= input_size
def call(self, inputs, **kwargs):
branch1 = self.branch1(inputs)
branch2 = self.branch2(inputs)
branch3 = self.branch3(inputs)
branch4 = self.branch4(inputs)
outputs = layers.concatenate([branch1, branch2, branch3, branch4])#将四个分支的输出在深度上进行拼接
return outputs
#定义辅助分类器结构
class InceptionAux(layers.Layer):
def __init__(self, num_classes, **kwargs):#方便定义层名称
super(InceptionAux, self).__init__(**kwargs)
self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)
self.conv = layers.Conv2D(128, kernel_size=1, activation="relu")
self.fc1 = layers.Dense(1024, activation="relu")
self.fc2 = layers.Dense(num_classes)
self.softmax = layers.Softmax()#将输出值转化为概率分布
def call(self, inputs, **kwargs):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14 N是batch
x = self.averagePool(inputs)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = layers.Flatten()(x)
x = layers.Dropout(rate=0.5)(x)
# N x 2048
x = self.fc1(x)
x = layers.Dropout(rate=0.5)(x)
# N x 1024
x = self.fc2(x)
# N x num_classes
x = self.softmax(x)
return x
模块4:训练
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from model import GoogLeNet
import tensorflow as tf
import json
import os
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = data_root + "/cat_dataset/" # flower data set path
train_dir = image_path + "train"
validation_dir = image_path + "val"
# create direction for saving weights
if not os.path.exists("save_weights"):
os.makedirs("save_weights")
im_height = 224
im_width = 224
batch_size = 20
epochs = 50
def pre_function(img):
# img = im.open('test.jpg')
# img = np.array(img).astype(np.float32)
img = img / 255.
img = (img - 0.5) * 2.0#标准化处理
return img
# data generator with data augmentation
#ImageDataGenerator会自动生成一个标签 与所需的三个标签是不匹配的 因此不能用fit方法 使用底层api
train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,
horizontal_flip=True)
validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)
train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
batch_size=batch_size,
shuffle=True,
target_size=(im_height, im_width),
class_mode='categorical')
total_train = train_data_gen.n
# get class dict
class_indices = train_data_gen.class_indices
# transform value and key of dict
inverse_dict = dict((val, key) for key, val in class_indices.items())
# write dict into json file
json_str = json.dumps(inverse_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
val_data_gen = train_image_generator.flow_from_directory(directory=validation_dir,
batch_size=batch_size,
shuffle=True,
target_size=(im_height, im_width),
class_mode='categorical')
total_val = val_data_gen.n
model = GoogLeNet(im_height=im_height, im_width=im_width, class_num=12, aux_logits=True)
# model.build((batch_size, 224, 224, 3)) # when using subclass model
model.summary()
# using keras low level api for training
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
@tf.function
def train_step(images, labels):
with tf.GradientTape() as tape:
aux1, aux2, output = model(images, training=True)#将一批图像数据输入到模型中 得到三个输出
loss1 = loss_object(labels, aux1)#计算损失
loss2 = loss_object(labels, aux2)
loss3 = loss_object(labels, output)
loss = loss1*0.3 + loss2*0.3 + loss3#根据googlenet原论文 将分损失加到总损失中
gradients = tape.gradient(loss, model.trainable_variables)#反向传播
optimizer.apply_gradients(zip(gradients, model.trainable_variables))#更新每个节点的参数
train_loss(loss)
train_accuracy(labels, output)
@tf.function
def test_step(images, labels):
_, _, output = model(images, training=False)#验证的时候就不需要辅助分类器1和2
t_loss = loss_object(labels, output)
test_loss(t_loss)
test_accuracy(labels, output)
best_test_loss = float('inf')
for epoch in range(1, epochs+1):
train_loss.reset_states() # clear history info
train_accuracy.reset_states() # clear history info
test_loss.reset_states() # clear history info
test_accuracy.reset_states() # clear history info
for step in range(total_train // batch_size):
images, labels = next(train_data_gen)
train_step(images, labels)
for step in range(total_val // batch_size):
test_images, test_labels = next(val_data_gen)
test_step(test_images, test_labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch,
train_loss.result(),
train_accuracy.result() * 100,
test_loss.result(),
test_accuracy.result() * 100))
if test_loss.result() < best_test_loss:
best_test_loss = test_loss.result()
model.save_weights("./save_weights/myGoogLeNet.h5")
模块5:预测
from model import GoogLeNet
from PIL import Image, ImageFont, ImageDraw
import numpy as np
import json
import matplotlib.pyplot as plt
im_height = 224
im_width = 224
# load image
img = Image.open("../1.jpg")
# resize image to 224x224
img = img.resize((im_width, im_height))
# scaling pixel value and normalize
img = ((np.array(img) / 255.) - 0.5) / 0.5
# Add the image to a batch where it's the only member.
img = (np.expand_dims(img, 0))
# read class_indict
try:
json_file = open('./class_indices.json', 'r')
class_indict = json.load(json_file)
except Exception as e:
print(e)
exit(-1)
model = GoogLeNet(class_num=12, aux_logits=False)
model.summary()
model.load_weights("./save_weights/myGoogLenet.h5", by_name=True) # h5 format
#model.load_weights("./save_weights/myGoogLeNet.ckpt") # ckpt format
result = model.predict(img)
predict_class = np.argmax(result)
img0 = Image.open("../1.jpg")
ttf = ImageFont.load_default()
draw = ImageDraw.Draw(img0) #新建画布绘画对象
draw.text((10,10),str(class_indict[str(predict_class)]),font=ttf, fill=(155,215,155))
img0.show()