benchmark:一些常见模型的模型文件,如mobilenet,resnet,vgg等。
cmake:有关链接openmp和valkan的cmake文件,这两个都是并行计算加速用的
docs:文档,包括开发指南,构建指南等等
examples:使用ncnn的示例,包括几个常用模型的cpp调用示例,及其cmakelist文件
images:此目录无关紧要,是页面上的图片
src:ncnn源码目录
toolchains:不同平台的cmake编译文件,包括aarch64、arm-linux、ios、jetson、pi3等
tools:包含以下其他深度学习框架模型转换到ncnn模型的工具,以及量化工具等
build.sh:构建脚本
CMakeLists.txt:总的cmake文件
这里我们主要阅读src中的源码
namespace ncnn {
class Extractor;
class Net
{
public:
// empty init
//空构造函数
Net();
// clear and destroy
//析构函数
~Net();
public:
// option can be changed before loading
//Option对象是用于load参数之前传入基本设置,比如线程数等
Option opt;
/*这一块用于用于注册用户自定义的layer,可以先略过不看,主要是两个注册自定义layer的函数,一个是传入命名自定义,一个是传入索引自定义
#if NCNN_STRING
// register custom layer by layer type name
// return 0 if success
int register_custom_layer(const char* type, layer_creator_func creator);
#endif // NCNN_STRING
// register custom layer by layer type
// return 0 if success
int register_custom_layer(int index, layer_creator_func creator);
*/
//接下来是多态的两个函数load_param和load_model
#if NCNN_STDIO
#if NCNN_STRING
// load network structure from plain param file
// return 0 if success
//通过文本文件加载网络结构
int load_param(FILE* fp);
//通过路径加载网络结构
int load_param(const char* protopath);
//通过内存加载网络结构
int load_param_mem(const char* mem);
#endif // NCNN_STRING
// load network structure from binary param file
// return 0 if success
//通过二进制文件加载网络结构
int load_param_bin(FILE* fp);
int load_param_bin(const char* protopath);
// load network weight data from model file
// return 0 if success
//通过model文件加载网络权重
int load_model(FILE* fp);
int load_model(const char* modelpath);
#endif // NCNN_STDIO
// load network structure from external memory
// memory pointer must be 32-bit aligned
// return bytes consumed
//通过外置内存加载网络结构
int load_param(const unsigned char* mem);
// reference network weight data from external memory
// weight data is not copied but referenced
// so external memory should be retained when used
// memory pointer must be 32-bit aligned
// return bytes consumed
//通过外置内存加载网络权重
int load_model(const unsigned char* mem);
// unload network structure and weight data
//清除网络结构和网络权重
void clear();
// construct an Extractor from network
//在Net内构建一个Extractor对象
Extractor create_extractor() const;
protected:
// parse the structure of network
// fuse int8 op dequantize and quantize by requantize
//重置网络,用于重用网络
int fuse_network();
//友元类,主要作用让Extractor对象可以访问Net对象中的私有和保护的属性和函数
friend class Extractor;
#if NCNN_STRING
//通过name查找对应的blob索引
int find_blob_index_by_name(const char* name) const;
//通过name查找对应的layer索引
int find_layer_index_by_name(const char* name) const;
//通过layer类型查找索引
int custom_layer_to_index(const char* type);
//根据类型创建layer
Layer* create_custom_layer(const char* type);
#endif // NCNN_STRING
//根据索引创建layer
Layer* create_custom_layer(int index);
//前向推理层
int forward_layer(int layer_index, std::vector& blob_mats, Option& opt) const;
protected:
//用于存储网络的blob的vector
std::vector blobs;
//用于存储网络的layer的vector
std::vector layers;
用于存储注册的layer
std::vector custom_layer_registry;
};
int Net::load_param(const DataReader& dr)
{
#define SCAN_VALUE(fmt, v) \
if (dr.scan(fmt, &v) != 1) \
{ \
NCNN_LOGE("parse " #v " failed"); \
return -1; \
}
int magic = 0;
SCAN_VALUE("%d", magic) // 读取第一行的magic数
if (magic != 7767517) // ncnn也有一个magic数7767517,这个magic数的作用是确定读进来的xxx.param文件是最新版本的
{
NCNN_LOGE("param is too old, please regenerate");
return -1;
}
// parse
int layer_count = 0;
int blob_count = 0;
SCAN_VALUE("%d", layer_count) // 第二行,网络的layer层数及blob数
SCAN_VALUE("%d", blob_count)
if (layer_count <= 0 || blob_count <= 0)
{
NCNN_LOGE("invalid layer_count or blob_count");
return -1;
}
d->layers.resize((size_t)layer_count); //读取成功则把在Net对象中用来存储layer和blob的两个vector resize出来
d->blobs.resize((size_t)blob_count);
...
ParamDict pd; // 特殊参数存放数据结构
int blob_index = 0; // 初始化blob的索引
// 第三行到最后一行,解析网络结构的每一层,从第三行开始是网络结构的层,每一行都有7类元素
/*
层类型
层名称
输入数据结构数量(bottom blob)
输出数据结构数量(top blob)
网络输入层名(一个或多个)
网络输出层名(一个或多个)
特殊参数(0个或多个): 一种是k=v的类型;另一种是k=len,v1,v2,v3….(数组类型)。该层在ncnn中是存放到paramDict结构中,不同类型层,各种参数意义不一样。
*/
// 不同层类型的对应特殊参数的对照表 https://github.com/Tencent/ncnn/wiki/operation-param-weight-table
for (int i = 0; i < layer_count; i++)
{
char layer_type[256]; //用来存储layer的类型
char layer_name[256]; //用来存储layer的名称
int bottom_count = 0; //用来存储输入数据结构数量(bottom blob)
int top_count = 0; //用来存储输出数据结构数量(top blob)
SCAN_VALUE("%255s", layer_type)
SCAN_VALUE("%255s", layer_name)
SCAN_VALUE("%d", bottom_count)
SCAN_VALUE("%d", top_count)
Layer* layer = create_layer(layer_type);
if (!layer) //如果layer不是默认类型,创建自定义layer
{
layer = create_custom_layer(layer_type);
}
if (!layer) //如果自定义layer没有注册过
{
NCNN_LOGE("layer %s not exists or registered", layer_type);
clear();
return -1;
}
#if NCNN_VULKAN
if (opt.use_vulkan_compute)
layer->vkdev = d->vkdev;
#endif // NCNN_VULKAN
layer->type = std::string(layer_type); //把读入的layer的类型和名称赋值给创建的layer对象
layer->name = std::string(layer_name);
// NCNN_LOGE("new layer %d %s", i, layer_name);
layer->bottoms.resize(bottom_count); //根据读入的bottom blob的数量resize layer的输入数据结构
for (int j = 0; j < bottom_count; j++) //解析layer的输入
{
char bottom_name[256]; //用来存储bottom的名字
SCAN_VALUE("%255s", bottom_name) //读入botoom的名字
int bottom_blob_index = find_blob_index_by_name(bottom_name); //Net对象的函数,通过bottom的名字查找对应的blob 的索引
if (bottom_blob_index == -1) //如果没有找到,则向blobs的vector中插入一个名为bottom_name的blob
{
Blob& blob = d->blobs[blob_index]; //设置第“index索引”个blob的参数
bottom_blob_index = blob_index;
blob.name = std::string(bottom_name);
// NCNN_LOGE("new blob %s", bottom_name);
blob_index++;
}
Blob& blob = d->blobs[bottom_blob_index]; //设置当前blob的参数
blob.consumer = i;
layer->bottoms[j] = bottom_blob_index;
}
//输出数据结构的初始化基本和输入数据结构的初始化相同
layer->tops.resize(top_count); //解析layer的输入
for (int j = 0; j < top_count; j++)
{
Blob& blob = d->blobs[blob_index];
char blob_name[256];
SCAN_VALUE("%255s", blob_name)
blob.name = std::string(blob_name);
// NCNN_LOGE("new blob %s", blob_name);
blob.producer = i; //设置这个blob的生产者,即输出这个blob的层索引
layer->tops[j] = blob_index; //设置第i层输出数据结构的第j个输入
blob_index++;
}
// layer specific params
// 用ParamDict 对象接收xxx.param第三行以后的每一行后面的特殊参数
// 一种是k=v的类型;另一种是k=len,v1,v2,v3….(数组类型)。
// 该层在ncnn中是存放到paramDict结构中,不同类型层,各种参数意义不一样。
int pdlr = pd.load_param(dr);
if (pdlr != 0)
{
NCNN_LOGE("ParamDict load_param %d %s failed", i, layer->name.c_str());
continue;
}
if (layer->support_int8_storage)
{
// no int8 gpu support yet
opt.use_vulkan_compute = false;
}
// pull out top shape hints
Mat shape_hints = pd.get(30, Mat());
if (!shape_hints.empty())
{
const int* psh = shape_hints;
for (int j = 0; j < top_count; j++)
{
Blob& blob = d->blobs[layer->tops[j]];
int dims = psh[0];
if (dims == 1)
{
blob.shape = Mat(psh[1], (void*)0, 4u, 1);
}
if (dims == 2)
{
blob.shape = Mat(psh[1], psh[2], (void*)0, 4u, 1);
}
if (dims == 3)
{
blob.shape = Mat(psh[1], psh[2], psh[3], (void*)0, 4u, 1);
}
psh += 4;
}
}
// set bottom and top shape hints
layer->bottom_shapes.resize(bottom_count);
for (int j = 0; j < bottom_count; j++)
{
layer->bottom_shapes[j] = d->blobs[layer->bottoms[j]].shape;
}
layer->top_shapes.resize(top_count);
for (int j = 0; j < top_count; j++)
{
layer->top_shapes[j] = d->blobs[layer->tops[j]].shape;
}
int lr = layer->load_param(pd); //传递给对应layer对象
if (lr != 0)
{
NCNN_LOGE("layer load_param %d %s failed", i, layer->name.c_str());
continue;
}
d->layers[i] = layer; //把解析初始化好的layer对象放入Net对象的layer的vector中
// 把对应的特殊参数传递给根据不同的层类型创建的layer对象。
}
d->update_input_output_indexes();
d->update_input_output_names();
#undef SCAN_VALUE
return 0;
}
把对应的特殊参数传递给根据不同的层类型创建的layer对象。这里说的不同的层类型目前主要有以下这么多(编译ncnn过程中生成的layer_type_enum.h):
AbsVal = 0, ArgMax = 1, BatchNorm = 2, Bias = 3, BNLL = 4, Concat = 5, Convolution = 6, Crop = 7, Deconvolution = 8, Dropout = 9, Eltwise =
10, ELU = 11, Embed = 12, Exp = 13, Flatten = 14, InnerProduct = 15,
Input = 16, Log = 17, LRN = 18, MemoryData = 19, MVN = 20, Pooling = 21, Power = 22, PReLU = 23, Proposal = 24, Reduction = 25, ReLU = 26, Reshape = 27, ROIPooling = 28, Scale = 29, Sigmoid = 30, Slice = 31, Softmax = 32, Split = 33, SPP = 34, TanH = 35, Threshold = 36, Tile = 37, RNN = 38, LSTM = 39, BinaryOp = 40, UnaryOp = 41, ConvolutionDepthWise = 42, Padding = 43, Squeeze = 44, ExpandDims = 45, Normalize = 46, Permute = 47, PriorBox = 48, DetectionOutput = 49, Interp = 50, DeconvolutionDepthWise = 51, ShuffleChannel = 52,
InstanceNorm = 53, Clip = 54, Reorg = 55, YoloDetectionOutput = 56,
Quantize = 57, Dequantize = 58, Yolov3DetectionOutput = 59,
PSROIPooling = 60, ROIAlign = 61, Packing = 62, Requantize = 63, Cast = 64, HardSigmoid = 65, SELU = 66,
class Convolution : public Layer
{
public:
Convolution();
virtual int load_param(const ParamDict& pd);
#include "layer_registry.h"
// 注册layer的总数
static const int layer_registry_entry_count = sizeof(layer_registry) / sizeof(layer_registry_entry);
#if NCNN_STRING
// 根据type转换成对应的index
int layer_to_index(const char* type)
{
for (int i = 0; i < layer_registry_entry_count; i++)
{
// 对layer进行编号
if (strcmp(type, layer_registry[i].name) == 0) //strcmp 相等为0
return i;
}
return -1;
}
Layer* create_layer(const char* type)
{
int index = layer_to_index(type);
if (index == -1)
return 0;
return create_layer(index);
}
#endif // NCNN_STRING
Layer* create_layer(int index)
{
#endif // NCNN_RUNTIME_CPU && NCNN_RVV
{
layer_creator = layer_registry[index].creator; // 创建layer
}
// 以卷积举例,layer_registry.h 中可以看到卷积卷积对应的creator就是Convolution_final_layer_creator
// 当调用creator的时候,其实就是调用(*layer_creator_func)()这个指针函数,也就是调用Convolution_final_layer_creator()这个函数。
// *INDENT-ON*
// clang-format on
if (!layer_creator)
return 0;
Layer* layer = layer_creator(0);
layer->typeindex = index;
return layer;
}
终还是调用了int Net::load_model(FILE* fp)函数
在modelbin.cpp中load的实现就很简单了,根据多态的load传入不同的参数,声明一维、二维或三维的Mat,然后把从xxx.bin中解析出的数据放入Mat返回回去就行了。
layer->load_model(mb),我们指定layer其实是解析完模型网络结构的具体的layer,比如卷积Convolution,也就是这个调用其实调用的是layer的子类Convolution层的load_model()函数。所以来看下Convolution层的load_model()函数(在src/layer/convolution.cpp里)
int Net::load_model(const DataReader& dr)
{
if (d->layers.empty())
{
NCNN_LOGE("network graph not ready");
return -1;
}
int layer_count = (int)d->layers.size();
// load file
int ret = 0;
ModelBinFromDataReader mb(dr); // 从传入的参数中读取模型权重参数,也就是从xxx.bin中读取
for (int i = 0; i < layer_count; i++)
{
Layer* layer = d->layers[i];
//Here we found inconsistent content in the parameter file.
//如果layer为空,说明没有构造或构造的和网络结构文件不一致
if (!layer)
{
NCNN_LOGE("load_model error at layer %d, parameter file has inconsistent content.", i);
ret = -1;
break;
}
int lret = layer->load_model(mb); //加载模型参数
if (lret != 0)
{
#if NCNN_STRING
NCNN_LOGE("layer load_model %d %s failed", i, layer->name.c_str());
#else
NCNN_LOGE("layer load_model %d failed", i);
#endif
ret = -1;
break;
}
if (layer->support_int8_storage)
{
// no int8 gpu support yet
opt.use_vulkan_compute = false;
}
}
#if NCNN_VULKAN
if (opt.use_vulkan_compute)
{
if (!opt.pipeline_cache)
{
if (!d->pipeline_cache)
d->pipeline_cache = new PipelineCache(d->vkdev);
opt.pipeline_cache = d->pipeline_cache;
}
}
#endif // NCNN_VULKAN
for (int i = 0; i < layer_count; i++)
{
Layer* layer = d->layers[i];
//根据设置的option参数创建流程管道,opt对象的类型是option类,在layer.h中可以找到声明
//opt对象主要作用是配置一些设置(比如线程数,是否使用vulkan加速、是否使用gpu等等设置)
Option opt1 = opt;
#if NCNN_VULKAN
if (opt.use_vulkan_compute)
{
if (!layer->support_image_storage) opt1.use_image_storage = false;
}
#endif // NCNN_VULKAN
int cret = layer->create_pipeline(opt1); //返回0说明创建pipeline成功
if (cret != 0)
{
#if NCNN_STRING
NCNN_LOGE("layer create_pipeline %d %s failed", i, layer->name.c_str());
#else
NCNN_LOGE("layer create_pipeline %d failed", i);
#endif
ret = -1;
break;
}
}
int Extractor::extract(int blob_index, Mat& feat, int type)
{
// 如果blob的索引小于0或大于blob_mats的size,说明数据不一致
if (blob_index < 0 || blob_index >= (int)d->blob_mats.size())
return -1;
int old_blocktime = get_kmp_blocktime();
set_kmp_blocktime(d->opt.openmp_blocktime);
int old_flush_denormals = get_flush_denormals();
set_flush_denormals(d->opt.flush_denormals);
int ret = 0;
if (d->blob_mats[blob_index].dims == 0)
{
int layer_index = d->net->blobs()[blob_index].producer; //查找输出blob对应的producer
// use local allocator
if (d->opt.use_local_pool_allocator)
{
if (!d->opt.blob_allocator)
{
d->opt.blob_allocator = d->net->d->local_blob_allocator;
}
if (!d->opt.workspace_allocator)
{
d->opt.workspace_allocator = d->net->d->local_workspace_allocator;
}
}
#if NCNN_VULKAN
if (d->opt.use_vulkan_compute)
{
// use local allocator
if (!d->opt.blob_vkallocator)
{
d->local_blob_vkallocator = d->net->vulkan_device()->acquire_blob_allocator();
d->opt.blob_vkallocator = d->local_blob_vkallocator;
}
if (!d->opt.workspace_vkallocator)
{
d->opt.workspace_vkallocator = d->opt.blob_vkallocator;
}
if (!d->opt.staging_vkallocator)
{
d->local_staging_vkallocator = d->net->vulkan_device()->acquire_staging_allocator();
d->opt.staging_vkallocator = d->local_staging_vkallocator;
}
ncnn::VkCompute cmd(d->net->vulkan_device());
#if NCNN_BENCHMARK
cmd.create_query_pool(d->net->layers().size() * 2);
#endif // NCNN_BENCHMARK
// TODO vkimagemat for adreno
if (d->opt.use_image_storage)
{
VkImageMat feat_gpu;
ret = extract(blob_index, feat_gpu, cmd);
if (d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
{
cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt);
cmd.submit_and_wait();
#if NCNN_BENCHMARK
std::vector results(d->net->layers().size() * 2);
cmd.get_query_pool_results(0, d->net->layers().size() * 2, results);
for (size_t i = 0; i < d->net->layers().size(); i++)
{
uint64_t start = results[i * 2];
uint64_t end = results[i * 2 + 1];
if (start == 0 || end == 0)
continue;
double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000;
NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us);
}
#endif // NCNN_BENCHMARK
}
}
else
{
VkMat feat_gpu;
ret = extract(blob_index, feat_gpu, cmd);
if (d->blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
{
cmd.record_download(feat_gpu, d->blob_mats[blob_index], d->opt);
cmd.submit_and_wait();
#if NCNN_BENCHMARK
std::vector results(d->net->layers().size() * 2);
cmd.get_query_pool_results(0, d->net->layers().size() * 2, results);
for (size_t i = 0; i < d->net->layers().size(); i++)
{
uint64_t start = results[i * 2];
uint64_t end = results[i * 2 + 1];
if (start == 0 || end == 0)
continue;
double duration_us = (end - start) * d->net->vulkan_device()->info.timestamp_period() / 1000;
NCNN_LOGE("%-24s %-30s %8.2lfus |", d->net->layers()[i]->type.c_str(), d->net->layers()[i]->name.c_str(), duration_us);
}
#endif // NCNN_BENCHMARK
}
}
}
else
{
ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt);
}
#else
ret = d->net->d->forward_layer(layer_index, d->blob_mats, d->opt); // 开始前向计算
// forward_layer会不断运行每一层的forward_layer直到最后一层输出
#endif // NCNN_VULKAN
}
class Allocator
{
public:
//Allocator基类析构函数
virtual ~Allocator();
//分配内存的函数,纯虚函数需要在子类中继承实现,输入需要分配的size
virtual void* fastMalloc(size_t size) = 0;
//释放内存的函数,纯虚函数需要在子类中继承实现,输入需要释放的内存的指针
virtual void fastFree(void* ptr) = 0;
};
// Aligns a pointer to the specified number of bytes
// ptr Aligned pointer
// n Alignment size that must be a power of two
template static inline _Tp* alignPtr(_Tp* ptr, int n=(int)sizeof(_Tp))
{
return (_Tp*)(((size_t)ptr + n-1) & -n);
}