• 六、项目实战---识别猫和狗


    一、准备数据集

    kagglecatsanddogs网上一搜一大堆,这里我就不上传了,需要的话可以私信
    在这里插入图片描述
    导包

    import os
    import zipfile
    import random
    import shutil
    import tensorflow as tf
    from tensorflow.keras.optimizers import RMSprop
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from shutil import copyfile
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8

    猫和狗的照片各12500张

    print(len(os.listdir('./temp/cats/')))
    print(len(os.listdir('./temp/dogs/')))
    """
    12500
    12500
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    生成训练数据文件夹和测试数据文件夹

    import os
    import zipfile
    import random
    import shutil
    import tensorflow as tf
    from tensorflow.keras.optimizers import RMSprop
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from shutil import copyfile
    
    
    def create_dir(file_dir):
        if os.path.exists(file_dir):
            print("True")
            shutil.rmtree(file_dir)#删除再创建
            os.makedirs(file_dir)
        else:
            os.makedirs(file_dir)
    
    
    cat_source_dir = "./temp/cats/"
    train_cats_dir = "./temp/train/cats/"
    test_cats_dir = "./temp/test/cats/"
    
    dot_source_dir = "./temp/dogs/"
    train_dogs_dir = "./temp/train/dogs/"
    test_dogs_dir = "./temp/test/dogs/"
    
    
    create_dir(train_cats_dir)#创建猫的训练集文件夹
    create_dir(test_cats_dir)#创建猫的测试集文件夹
    create_dir(train_dogs_dir)#创建狗的训练集文件夹
    create_dir(test_dogs_dir)#创建狗的测试集文件夹
    
    """
    True
    True
    True
    True
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39

    在这里插入图片描述
    将总的猫狗图像按9:1分成训练集和测试集,猫和狗各12500张
    最终temp/train/catstemp/train/dogs两个文件夹下各12500 * 0.9=11250张
    temp/test/catstemp/test/dogs这两个文件夹下各12500 * 0.1=1250张
    cats和dogs为总共的猫狗图像
    test和train为准备的数据集文件

    import os
    import zipfile
    import random
    import shutil
    import tensorflow as tf
    from tensorflow.keras.optimizers import RMSprop
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from shutil import copyfile
    
    
    def split_data(source,train,test,split_size):
        files = []
        for filename in os.listdir(source):
            file = source + filename
            if os.path.getsize(file)>0:
                files.append(filename)
            else:
                print(filename + "is zero file,please ignoring")
                
            train_length = int(len(files)*split_size)
            test_length = int(len(files)-train_length)
            shuffled_set = random.sample(files,len(files))
            train_set = shuffled_set[0:train_length]
            test_set = shuffled_set[-test_length:]
            
            for filename in train_set:
                this_file = source + filename
                destination = train + filename
                copyfile(this_file,destination)
                
            for filename in test_set:
                this_file = source + filename
                destination = test + filename
                copyfile(this_file,destination)
    
    
    cat_source_dir = "./temp/cats/"
    train_cats_dir = "./temp/train/cats/"
    test_cats_dir = "./temp/test/cats/"
    
    dot_source_dir = "./temp/dogs/"
    train_dogs_dir = "./temp/train/dogs/"
    test_dogs_dir = "./temp/test/dogs/"
    
    
    split_size = 0.9
    split_data(cat_source_dir,train_cats_dir,test_cats_dir,split_size)
    split_data(dog_source_dir,train_dogs_dir,test_dogs_dir,split_size)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48

    二、模型的搭建和训练

    先对数据进行归一化操作,预处理进行优化一下

    import os
    import zipfile
    import random
    import shutil
    import tensorflow as tf
    from tensorflow.keras.optimizers import RMSprop
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from shutil import copyfile
    
    
    train_dir = "./temp/train/"
    train_datagen = ImageDataGenerator(rescale=1.0/255.0)#优化网络,先进行归一化操作
    train_generator = train_datagen.flow_from_directory(train_dir,batch_size=100,class_mode='binary',target_size=(150,150))#二分类,训练样本的输入的要一致
    
    validation_dir = "./temp/test/"
    validation_datagen = ImageDataGenerator(rescale=1.0/255.0)
    validation_generator = validation_datagen.flow_from_directory(validation_dir,batch_size=100,class_mode='binary',target_size=(150,150))
    """
    Found 22500 images belonging to 2 classes.
    Found 2500 images belonging to 2 classes.
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21

    搭建模型架构

    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape=(150,150,3)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
    
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512,activation='relu'),
        tf.keras.layers.Dense(1,activation='sigmoid')
    ])
    model.compile(optimizer=RMSprop(lr=0.001),loss='binary_crossentropy',metrics=['acc'])
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    训练模型
    225:因为数据一共22500张,猫和狗各12500张,其对于训练集个11250张,故训练集共22500张,在预处理第一段代码中,batch_size=100设置了一批100个,故总共应该有225批
    epochs=2:两轮,也就是所有的样本全部训练一次
    每轮包含225批,每一批有100张样本

    history = model.fit_generator(train_generator,
                                 epochs=2,#进行2轮训练,每轮255批
                                 verbose=1,#要不记录每次训练的日志,1表示记录
                                 validation_data=validation_generator)
                                 
    """
    Instructions for updating:
    Use tf.cast instead.
    Epoch 1/2
    131/225 [================>.............] - ETA: 2:03 - loss: 0.7204 - acc: 0.6093
    """
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    history是模型运行过程的结果

    三、分析训练结果

    import matplotlib.image as mpimg
    import matplotlib.pyplot as plt
    
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(acc))
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    epoch太少了,导致是直线,多训练几轮实际应该是折线图
    准确率

    plt.plot(epochs,acc,'r',"training accuracy")
    plt.plot(epochs,val_acc,'b',"validation accuracy")
    plt.title("training and validation accuracy")
    plt.figure()
    
    • 1
    • 2
    • 3
    • 4

    在这里插入图片描述
    损失值

    plt.plot(epochs,loss,'r',"training loss")
    plt.plot(epochs,val_loss,'b',"validation loss")
    plt.figure()
    
    • 1
    • 2
    • 3

    在这里插入图片描述

    四、模型的使用验证

    import numpy as np
    from google.colab import files
    from tensorflow.keras.preprocessing import image
    
    uploaded = files.upload()
    for fn in uploaded.keys():
        path = 'G:/Juptyer_workspace/Tensorflow_mooc/sjj/test/' + fn#该路径为要用模型测试的路径
        img = image.load_img(path,target_size=(150,150))
        x = image.img_to_array(img)#多维数组
        x = np.expand_dims(x,axis=0)#拉伸
        
        images = np.vstack([x])#水平方向拉直
        classes = model.predict(images,batch_size=10)
        print(classes[0])
        if classes[0]>0.5:
            print(fn + "it is a dog")
        else:
            print(fn + "it is a cat")
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
  • 相关阅读:
    进化:元宇宙明天的主题
    golang实现前缀数实现匹配路由和获取路由参数
    题目:2729.判断一个数是否迷人
    项目部署服务器--浏览器拒绝访问问题
    ubuntu安装debian包的命令dpkg和apt的详解
    《智能风控实践指南》笔记(二)
    乙方策划人员的内心独白:写不完的案子,是工作的常态吗?
    工业数字化与新一代数字化系统设计平台----讲座
    Java单测Mock升级实践
    CPU缓存一致性
  • 原文地址:https://blog.csdn.net/qq_41264055/article/details/125460998