• 9月2日目标检测学习笔记——自然场景下文本检测



    前言

    本文为9月2日目标检测学习笔记——自然场景下文本检测,分为三个章节:

    • ICDAR 数据集;
    • EAST 模型;
    • MTCNN 模型。

    一、ICDAR 数据集

    1、数据标注格式

    1

    • 左边:原图;
    • 中间:标注的图像;
    • 右边:ground truth 文件内容,按顺时针顺序的坐标,最后是 words;
    • ###:don’t care。

    二、EAST 模型

    • 使用 FCN 生成多尺度融合的特征图;
    • 支持旋转矩形框;

    网络结构:

    2

    1、参数修改

    • multigpu_train.py:
    tf.app.flags.DEFINE_integer('input_size', 512, '')
    tf.app.flags.DEFINE_integer('batch_size_per_gpu', 2, '')
    tf.app.flags.DEFINE_integer('num_readers', 1, '')
    tf.app.flags.DEFINE_float('learning_rate', 0.0001, '')
    tf.app.flags.DEFINE_integer('max_steps', 100000, '')
    tf.app.flags.DEFINE_float('moving_average_decay', 0.997, '')
    tf.app.flags.DEFINE_string('gpu_list', '0', '')
    tf.app.flags.DEFINE_string('checkpoint_path', '/tmp/east_resnet_v1_50_rbox/', '')
    tf.app.flags.DEFINE_boolean('restore', False, 'whether to resotre from checkpoint')
    tf.app.flags.DEFINE_integer('save_checkpoint_steps', 1000, '')
    tf.app.flags.DEFINE_integer('save_summary_steps', 100, '')
    tf.app.flags.DEFINE_string('pretrained_model_path', None, '')
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    2、模型测试

    • eval.py:
    tf.app.flags.DEFINE_string('test_data_path', './test/', '')
    tf.app.flags.DEFINE_string('gpu_list', '0', '')
    tf.app.flags.DEFINE_string('checkpoint_path', './model/', '')
    tf.app.flags.DEFINE_string('output_dir', './out/', '')
    tf.app.flags.DEFINE_bool('no_write_images', False, 'do not write images')
    
    • 1
    • 2
    • 3
    • 4
    • 5

    三、MTCNN

    3

    • Stage 1:proposal net;
    • Stage 2:refine net;
    • Stage 3:output net。

    1、LFPW 数据打包

    数据划分:

    • Negative:非人脸;

    • Positive:人脸;

    • Part faces:部分人脸;

    • gen_12net_data.py:

    anno_file = "wider_face_train.txt"
    im_dir = "../../DATA/WIDER_train/images"
    pos_save_dir = "../../DATA/12/positive"
    part_save_dir = "../../DATA/12/part"
    neg_save_dir = '../../DATA/12/negative'
    save_dir = "../../DATA/12"
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • gen_landmark_aug_12.py:
    dstdir = "../../DATA/12/train_PNet_landmark_aug"
        OUTPUT = '../../DATA/12'
        data_path = '../../DATA'
        if not exists(OUTPUT):
            os.mkdir(OUTPUT)
        if not exists(dstdir):
            os.mkdir(dstdir)
        assert (exists(dstdir) and exists(OUTPUT))
        # train data
        net = "PNet"
        #the file contains the names of all the landmark training data
        train_txt = "trainImageList.txt"
        imgs,landmarks = GenerateData(train_txt,data_path,net,argument=True )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    2、模型训练

    • P-Net:
    #data path
        base_dir = '../../DATA/imglists/PNet'
        model_name = 'MTCNN'
        #model_path = '../data/%s_model/PNet/PNet' % model_name
        #with landmark
        model_path = '../data/%s_model/PNet_landmark/PNet' % model_name
                
        prefix = model_path
        end_epoch = 30
        display = 100
        lr = 0.001
        train_PNet(base_dir, prefix, end_epoch, display, lr)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • R-Net:
        base_dir = '../../DATA/imglists_noLM/RNet'
    
        model_name = 'MTCNN'
        model_path = '../data/%s_model/RNet_No_Landmark/RNet' % model_name
        prefix = model_path
        end_epoch = 22
        display = 100
        lr = 0.001
        train_RNet(base_dir, prefix, end_epoch, display, lr)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • O-Net:
    base_dir = '../../DATA/imglists/ONet'
    
        model_name = 'MTCNN'
        model_path = '../data/%s_model/ONet_landmark/ONet' % model_name
        prefix = model_path
        end_epoch = 22
        display = 10
        lr = 0.001
        train_ONet(base_dir, prefix, end_epoch, display, lr)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    3、模型测试

    #coding:utf-8
    import sys
    sys.path.append('..')
    from Detection.MtcnnDetector import MtcnnDetector
    from Detection.detector import Detector
    from Detection.fcn_detector import FcnDetector
    from Detection.train_models import P_Net, R_Net, O_Net
    from prepare_data.loader import TestLoader
    import cv2
    import os
    import numpy as np
    test_mode = "PNet"
    thresh = [0.6, 0.7, 0.7]
    min_face_size = 20
    stride = 2
    slide_window = False
    shuffle = False
    detectors = [None, None, None]
    prefix = ['../data/MTCNN_model/PNet_No_landmark/PNet', '../data/MTCNN_model/RNet_landmark/RNet', '../data/MTCNN_model/ONet_landmark/ONet']
    epoch = [30, 14, 16]
    batch_size = [2048, 64, 16]
    model_path = ['%s-%s' % (x, y) for x, y in zip(prefix, epoch)]
    # load pnet model
    if slide_window:
        PNet = Detector(P_Net, 12, batch_size[0], model_path[0])
    else:
        PNet = FcnDetector(P_Net, model_path[0])
    detectors[0] = PNet
    
    # load rnet model
    if test_mode in ["RNet", "ONet"]:
        RNet = Detector(R_Net, 24, batch_size[1], model_path[1])
        detectors[1] = RNet
    
    # load onet model
    if test_mode == "ONet":
        ONet = Detector(O_Net, 48, batch_size[2], model_path[2])
        detectors[2] = ONet
    
    mtcnn_detector = MtcnnDetector(detectors=detectors, min_face_size=min_face_size,
                                   stride=stride, threshold=thresh, slide_window=slide_window)
    gt_imdb = []
    #gt_imdb.append("35_Basketball_Basketball_35_515.jpg")
    #imdb_ = dict()"
    #imdb_['image'] = im_path
    #imdb_['label'] = 5
    path = "../../DATA/test/lfpw_testImage"
    for item in os.listdir(path):
        gt_imdb.append(os.path.join(path,item))
    test_data = TestLoader(gt_imdb)
    
    all_boxes,landmarks = mtcnn_detector.detect_face(test_data)
    
    count = 0
    for imagepath in gt_imdb:
        print(imagepath)
        image = cv2.imread(imagepath)
        for bbox in all_boxes[count]:
            cv2.putText(image,str(np.round(bbox[4],2)),(int(bbox[0]),int(bbox[1])),cv2.FONT_HERSHEY_TRIPLEX,1,color=(255,0,255))
            cv2.rectangle(image, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))
    
        '''
            for landmark in landmarks[count]:
    
            for i in range(len(landmark)//2):
                cv2.circle(image, (int(landmark[2*i]),int(int(landmark[2*i+1]))), 3, (0,0,255))
        '''
        
        count = count + 1
        #cv2.imwrite("result_landmark/%d.png" %(count),image)
        cv2.imshow("lala",image)
        cv2.waitKey(0)    
    
    '''
    for data in test_data:
        print type(data)
        for bbox in all_boxes[0]:
            print bbox
            print (int(bbox[0]),int(bbox[1]))
            cv2.rectangle(data, (int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))
        #print data
        cv2.imshow("lala",data)
        cv2.waitKey(0)
    '''
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84

  • 相关阅读:
    构建直接序列扩频系统模型(Matlab代码实现)
    SpringBoot集成Spring Data JPA项目实操
    动手学深度学习_目标检测
    Java基于springboot+vue的房屋出租租房系统 前后端分离
    机器学习笔记 - 时间序列的趋势分量
    Flink1.15源码解析--启动TaskManager
    使用cmd登录阿里云服务器
    四川大学计算机考研资料汇总
    Java泛型
    javaee之黑马乐优商城5
  • 原文地址:https://blog.csdn.net/Ashen_0nee/article/details/126654493