• tvm交叉编译android opencl


    模型编译:

    #encoding:utf-8

    import onnx

    import numpy as np

    import tvm

    import tvm.relay as relay

    import os

    from tvm.contrib import ndk

    onnx_model = onnx.load('mobilenet_v3_small.onnx')

    x = np.ones([1,3,224,224])                      

    input_name = 'input1'                                              

    shape_dict = {input_name: x.shape}

    sym, params = relay.frontend.from_onnx(onnx_model, shape_dict)

    target = "opencl -device=adreno"

    target_host = "llvm -mtriple=arm64-linux-android"

    with tvm.transform.PassContext(opt_level=3):

        graph, lib, params = relay.build(sym, target=target, target_host=target_host, params=params)

    lib.export_library("deploy.so", cc="/path_to_ndk/26.0.10792818/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android34-clang++")

    graph_json_path = "deploy.json"

    with open(graph_json_path, 'w') as fo:

        fo.write(graph)

    param_path = "deploy.params"

    with open(param_path, 'wb') as fo:

        fo.write(relay.save_param_dict(params))

    --------------------------------------------------------------------------------------------------------------------------

    c++代码:

    #include

    #include

    #include

    #include

    #include

    #include

    #include

    #include

    int main()

    {

        // tvm module for compiled functions

        tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so");

        // json graph

        std::ifstream json_in("deploy.json", std::ios::in);

        std::string json_data((std::istreambuf_iterator(json_in)), std::istreambuf_iterator());

        json_in.close();

        // parameters in binary

        std::ifstream params_in("deploy.params", std::ios::binary);

        std::string params_data((std::istreambuf_iterator(params_in)), std::istreambuf_iterator());

        params_in.close();

        // parameters need to be TVMByteArray type to indicate the binary data

        TVMByteArray params_arr;

        params_arr.data = params_data.c_str();

        params_arr.size = params_data.length();

        int dtype_code = kDLFloat;

        int dtype_bits = 32;

        int dtype_lanes = 1;

        int device_type = kDLOpenCL;

        int device_id = 0;

        // get global function module for graph runtime

        tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_executor.create"))(json_data, mod_syslib, device_type, device_id);

        DLTensor* x;

        int in_ndim = 4;

        int64_t in_shape[4] = {1, 3, 224, 224};

        TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);

        // load image data saved in binary

        std::ifstream data_fin("cat.bin", std::ios::binary);

        if(data_fin.is_open()){

            ;

        }else{

            std::cout << "@@@@@@@@ Failed to open cat.bin!!!" << std::endl;

        }

        float img[3*224*224] = {0.0f};

        // data_fin.read(static_cast(x->data), 3 * 224 * 224 * 4);

        data_fin.read((char*)img, 3 * 224 * 224 * sizeof(float));


     

        TVMArrayCopyFromBytes(x, img, 3 * 224 * 224 * sizeof(float));


     

        // get the function from the module(set input data)

        tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");

        set_input("input", x);

        // get the function from the module(load patameters)

        tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params");

        load_params(params_arr);

        // get the function from the module(run it)

        tvm::runtime::PackedFunc run = mod.GetFunction("run");

        run();

        std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();

        const uint_t loops = 10000;

        for(uint_t i = 0; i < loops; i++){

            run();

        }

        std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now();

        auto elapsed = end_time - start_time;

        long long elapsed_time = std::chrono::duration_cast(elapsed).count();

        std::cout << "elapsed time in ount: " << elapsed_time << std::endl;

        std::cout << "inference time per image: " << double(elapsed_time) / double(loops) << std::endl;


     

        DLTensor* y;

        int out_ndim = 2;

        int64_t out_shape[2] = {1, 1001};

        TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);

        // get the function from the module(get output data)

        tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");

        get_output(0, y);

        // get the maximum position in output vector

        // auto y_iter = static_cast(y->data);

        float y_iter[1001] = {0};

        TVMArrayCopyToBytes(y, y_iter, 1001*sizeof(float));

        auto max_iter = std::max_element(y_iter, y_iter + 1001);

        auto max_index = std::distance(y_iter, max_iter);

        std::cout << "The maximum position in output vector is: " << max_index << std::endl;

        TVMArrayFree(x);

        TVMArrayFree(y);

        return 0;

    }

    其他的请参考:tvm交叉编译示例——android cpu-CSDN博客

  • 相关阅读:
    Linux多线程服务端编程:使用muduo C++网络库 学习笔记 第四章 C++多线程系统编程精要
    学到一招 chrome 浏览器 debug 悬浮样式
    【性能测试】初识 Jmeter 中的 BeanShell
    Spring AOP
    好用的word插件汇总
    Elasticsearch 聚合字段aggregate-metric-double
    盘点 JavaScript 中类的继承
    在win系统安装部署svn服务及客户端使用
    用C++标准库生成制定范围内的整数随机数
    【Python基础篇016】异常处理的超详细讲解
  • 原文地址:https://blog.csdn.net/love_xunmeng/article/details/136398808