• Keras速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出onnx并预测


    需要做点什么

    方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用keras,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。

    系统环境

    python 3.7.4
    tensorflow 2.6.0
    keras 2.6.0
    onnx 1.9.0
    onnxruntime-gpu 1.9.0
    tf2onnx 1.9.3

    数据准备

    MNIST数据集csv文件是一个42000x785的矩阵
    42000表示有42000张图片
    785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:

    1 0 0 0 0 0 0 0 0 0 ..
    0 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    4 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0
    7 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    5 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    8 0 0 0 0 0 0 0 0 0
    9 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    2 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0

    1. 导入需要的包

    import os
    import onnx
    import keras
    import logging
    import subprocess
    import numpy as np
    import pandas as pd
    import tensorflow as tf
    import onnxruntime as ort
    from sklearn.metrics import accuracy_score
    from keras.models import Sequential, Model, load_model, save_model
    from keras.layers import Dense, Activation, Dropout, Conv2D, Flatten, MaxPool2D, Input, Conv1D
    from keras.utils.np_utils import to_categorical
    
    tf.autograph.set_verbosity(0)
    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    

    2. 参数准备

    N_EPOCH = 1
    N_BATCH = 64
    N_BATCH_NUM = 500
    S_DATA_PATH = r"mnist_train.csv"
    S_KERAS_MODEL_DIR_PATH = r"cnn_keras"
    S_KERAS_MODEL_PATH = r"cnn_keras.h5"
    S_ONNX_MODEL_PATH = r"cnn_keras.onnx"
    S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"  # 使用gpu
    # S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", 0, "cpu"  # 没有gpu请反注释这行以使用CPU
    
    if S_DEVICE == "cpu":
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    

    3. 读取数据

    df = pd.read_csv(S_DATA_PATH, header=None)
    np_mat = np.array(df)
    print(df.shape)
    print(np_mat.shape)
    
    X = np_mat[:, 1:]
    Y = np_mat[:, 0]
    X = X.astype(np.float32) / 255
    X_train = X[:N_BATCH * N_BATCH_NUM]
    X_test = X[N_BATCH * N_BATCH_NUM:]
    Y_train = Y[:N_BATCH * N_BATCH_NUM]
    Y_test = Y[N_BATCH * N_BATCH_NUM:]
    
    X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
    X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
    Y_train = to_categorical(Y_train, num_classes=10)
    Y_test = to_categorical(Y_test, num_classes=10)
    print(X_train.shape)
    print(Y_train.shape)
    print(X_test.shape)
    print(Y_test.shape)
    

    运行输出

    (42000, 785)
    (42000, 785)
    (32000, 28, 28, 1)
    (32000, 10)
    (10000, 28, 28, 1)
    (10000, 10)
    

    4. 模型构建

    x_in = Input(shape=(28, 28, 1))  # 图像维度必须是 w h c
    x = Conv2D(filters=32, kernel_size=(3, 3))(x_in)
    x = MaxPool2D(pool_size=(2, 2))(x)
    x = Dropout(0.2)(x)
    x = Flatten()(x)
    x = Dense(128)(x)
    x = Activation('relu')(x)
    x = Dense(10)(x)
    y = Activation('softmax')(x)
    model = Model(x_in, y)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print(model.summary())
    

    运行输出

    Model: "model"
    _________________________________________________________________
    Layer (type)                 Output Shape              Param #   
    =================================================================
    input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
    _________________________________________________________________
    conv2d (Conv2D)              (None, 26, 26, 32)        320       
    _________________________________________________________________
    max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
    _________________________________________________________________
    dropout (Dropout)            (None, 13, 13, 32)        0         
    _________________________________________________________________
    flatten (Flatten)            (None, 5408)              0         
    _________________________________________________________________
    dense (Dense)                (None, 128)               692352    
    _________________________________________________________________
    activation (Activation)      (None, 128)               0         
    _________________________________________________________________
    dense_1 (Dense)              (None, 10)                1290      
    _________________________________________________________________
    activation_1 (Activation)    (None, 10)                0         
    =================================================================
    Total params: 693,962
    Trainable params: 693,962
    Non-trainable params: 0
    _________________________________________________________________
    None
    

    5. 模型训练和保存

    model.fit(X_train,
              Y_train,
              epochs=N_EPOCH,
              batch_size=N_BATCH,
              verbose=1,
              validation_data=(X_test, Y_test))
    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    save_model(model, S_KERAS_MODEL_PATH)
    

    运行输出

    486/500 [============================>.] - ETA: 0s - loss: 0.2873 - accuracy: 0.9144
    500/500 [==============================] - 4s 3ms/step - loss: 0.2837 - accuracy: 0.9155 - val_loss: 0.1352 - val_accuracy: 0.9616
    Test score: 0.13516278564929962
    Test accuracy: 0.9616000056266785
    

    6.模型加载和加载模型使用

    load_model = load_model(S_KERAS_MODEL_PATH)
    print("load model ok")
    score = load_model.evaluate(X_test, Y_test, verbose=0)
    print('load model Test score:', score[0])
    print('load model Test accuracy:', score[1])
    

    运行输出

    load model ok
    load model Test score: 0.13516278564929962
    load model Test accuracy: 0.9616000056266785
    

    7.导出ONNX

    s_cmd = 'python -m tf2onnx.convert --keras %s --output %s' % (S_KERAS_MODEL_PATH, S_ONNX_MODEL_PATH)
    print(s_cmd)
    print(os.system(s_cmd))
    # proc = subprocess.run(s_cmd.split(), check=True)
    # print(proc.returncode)
    

    运行输出

    python -m tf2onnx.convert --keras G:\Data\task_model_out\_tmp_out\cnn_keras.h5 --output G:\Data\task_model_out\_tmp_out\cnn_keras.onnx
    0
    

    8. 加载ONNX并运行

    model = onnx.load(S_ONNX_MODEL_PATH)
    print(onnx.checker.check_model(model))  # Check that the model is well formed
    print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
    ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
    print("input name ", ls_input_name)
    print("output name ", ls_output_name)
    s_input_name = ls_input_name[0]
    
    x_input = X_train[:N_BATCH*2, :, :, :].astype(np.float32)
    ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
    print("val device ", ort_val.device_name())
    print("val shape ", ort_val.shape())
    print("val data type ", ort_val.data_type())
    print("is_tensor ", ort_val.is_tensor())
    print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
    providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
    print("providers ", providers)
    ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH, providers=[providers])  # gpu运行
    ort_session.set_providers([providers])
    outputs = ort_session.run(None, {s_input_name: ort_val})
    print("sess env ", ort_session.get_providers())
    print(type(outputs))
    print(outputs[0])
    

    运行输出

    None
    graph tf2onnx (
      %input_1:0[FLOAT, unk__17x28x28x1]
    ) initializers (
      %new_shape__15[INT64, 4]
      %model/dense_1/MatMul/ReadVariableOp:0[FLOAT, 128x10]
      %model/dense_1/BiasAdd/ReadVariableOp:0[FLOAT, 10]
      %model/dense/MatMul/ReadVariableOp:0[FLOAT, 5408x128]
      %model/dense/BiasAdd/ReadVariableOp:0[FLOAT, 128]
      %model/conv2d/Conv2D/ReadVariableOp:0[FLOAT, 32x1x3x3]
      %model/conv2d/BiasAdd/ReadVariableOp:0[FLOAT, 32]
      %const_fold_opt__16[INT64, 2]
    ) {
      %model/conv2d/BiasAdd__6:0 = Reshape(%input_1:0, %new_shape__15)
      %model/conv2d/BiasAdd:0 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], strides = [1, 1]](%model/conv2d/BiasAdd__6:0, %model/conv2d/Conv2D/ReadVariableOp:0, %model/conv2d/BiasAdd/ReadVariableOp:0)
      %model/max_pooling2d/MaxPool:0 = MaxPool[kernel_shape = [2, 2], strides = [2, 2]](%model/conv2d/BiasAdd:0)
      %model/max_pooling2d/MaxPool__12:0 = Transpose[perm = [0, 2, 3, 1]](%model/max_pooling2d/MaxPool:0)
      %model/flatten/Reshape:0 = Reshape(%model/max_pooling2d/MaxPool__12:0, %const_fold_opt__16)
      %model/dense/MatMul:0 = MatMul(%model/flatten/Reshape:0, %model/dense/MatMul/ReadVariableOp:0)
      %model/dense/BiasAdd:0 = Add(%model/dense/MatMul:0, %model/dense/BiasAdd/ReadVariableOp:0)
      %model/activation/Relu:0 = Relu(%model/dense/BiasAdd:0)
      %model/dense_1/MatMul:0 = MatMul(%model/activation/Relu:0, %model/dense_1/MatMul/ReadVariableOp:0)
      %model/dense_1/BiasAdd:0 = Add(%model/dense_1/MatMul:0, %model/dense_1/BiasAdd/ReadVariableOp:0)
      %Identity:0 = Softmax[axis = 1](%model/dense_1/BiasAdd:0)
      return %Identity:0
    }
    input name  ['input_1:0']
    output name  ['Identity:0']
    val device  cuda
    val shape  [128, 28, 28, 1]
    val data type  tensor(float)
    is_tensor  True
    array_equal  True
    providers  CUDAExecutionProvider
    sess env  ['CUDAExecutionProvider', 'CPUExecutionProvider']
    <class 'list'>
    [[1.0287621e-04 9.9524093e-01 5.0408958e-04 ... 6.5664819e-05
      3.8182980e-03 1.2303158e-05]
     [9.9932754e-01 2.7173186e-08 3.5315077e-04 ... 3.0959238e-06
      8.5986117e-05 3.6047477e-06]
     [1.1101285e-05 9.9719965e-01 3.8205151e-04 ... 1.2267688e-03
      7.8595197e-04 4.0839368e-05]
     ...
     [2.8337089e-02 1.5399084e-05 2.1733245e-01 ... 1.5945830e-05
      2.1134425e-02 1.7111158e-03]
     [1.7888090e-06 3.3868539e-06 5.2631256e-04 ... 9.9888057e-01
      5.4794059e-06 5.5255485e-04]
     [4.1398227e-05 1.0462944e-06 5.5901739e-03 ... 3.1221823e-09
      6.6847453e-04 7.8918066e-07]]
    

    你甚至不愿意Start的Github

    ai_fast_handbook

  • 相关阅读:
    Java 复习笔记 - 常用API 中
    SpringMVC学习笔记
    MySQL主从复制和读写分离
    记 350亿数据从 es 迁移到 ClickHouse 遇到的问题
    【Java 基础篇】优雅处理文本数据:Java字符流详解
    About Significance Tests
    【无标题】
    Docker Compose之容器编排开发初探
    Python数据结构——基础数据结构
    bug:XShell无法连接CentOS虚拟机
  • 原文地址:https://www.cnblogs.com/Kalafinaian/p/16029085.html