• ceres解析导数(Analytic Derivatives)进阶


    在这里插入图片描述


      ceres中有三种求导的方式,分别是自动求导、数值导数和解析导数,本节详细讲述解析导数的使用方式。

    一、问题定义

      考虑一个直线拟合问题:
    y = b 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 y = \frac{b_1}{(1+e^{b_2-b_3x})^{1/b_4}} y=(1+eb2b3x)1/b4b1
    我们给定一些数据 { x i , y i } ,   ∀ i = 1 , . . . , n \{x_i, y_i\},\ \forall i=1,... ,n {xi,yi}, i=1,...,n来求解参数 b 1 , b 2 , b 3 b_1, b_2, b_3 b1,b2,b3 b 4 b_4 b4,这个问题可以等价为,需要寻找一组参数 b 1 , b 2 , b 3 b_1, b_2, b_3 b1,b2,b3 b 4 b_4 b4,使得如下方程的数值最小:
    E ( b 1 , b 2 , b 3 , b 4 ) = ∑ i f 2 ( b 1 , b 2 , b 3 , b 4 ; x i , y i ) = ∑ i ( b 1 ( 1 + e b 2 − b 3 x i ) 1 / b 4 − y i ) 2 E(b1,b2,b3,b4)=if2(b1,b2,b3,b4;xi,yi)=i(b1(1+eb2b3xi)1/b4yi)2 E(b1,b2,b3,b4)=if2(b1,b2,b3,b4;xi,yi)=i((1+eb2b3xi)1/b4b1yi)2
    为了用ceres来解决这个问题,我们需要定义一个CostFunction(损失函数)来计算给定的 x x x y y y的残差 f f f和导数。利用高等数学求导的知识,我们可以将 f f f分别 b 1 , b 2 , b 3 , b 4 b_1,b_2,b3,b_4 b1,b2,b3,b4求导,得到如下结果:
    D 1 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = 1 ( 1 + e b 2 − b 3 x ) 1 / b 4 D 2 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = − b 1 e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 3 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 x e b 2 − b 3 x b 4 ( 1 + e b 2 − b 3 x ) 1 / b 4 + 1 D 4 f ( b 1 , b 2 , b 3 , b 4 ; x , y ) = b 1 log ⁡ ( 1 + e b 2 − b 3 x ) b 4 2 ( 1 + e b 2 − b 3 x ) 1 / b 4 D1f(b1,b2,b3,b4;x,y)=1(1+eb2b3x)1/b4D2f(b1,b2,b3,b4;x,y)=b1eb2b3xb4(1+eb2b3x)1/b4+1D3f(b1,b2,b3,b4;x,y)=b1xeb2b3xb4(1+eb2b3x)1/b4+1D4f(b1,b2,b3,b4;x,y)=b1log(1+eb2b3x)b24(1+eb2b3x)1/b4 D1f(b1,b2,b3,b4;x,y)D2f(b1,b2,b3,b4;x,y)D3f(b1,b2,b3,b4;x,y)D4f(b1,b2,b3,b4;x,y)=(1+eb2b3x)1/b41=b4(1+eb2b3x)1/b4+1b1eb2b3x=b4(1+eb2b3x)1/b4+1b1xeb2b3x=b42(1+eb2b3x)1/b4b1log(1+eb2b3x)

    二、代价函数定义

      有了这些导数,我们可以定义损失函数如下:

    class Rat43Analytic : public SizedCostFunction<1,4> {
       public:
         Rat43Analytic(const double x, const double y) : x_(x), y_(y) {}
         virtual ~Rat43Analytic() {}
         virtual bool Evaluate(double const* const* parameters,
                               double* residuals,
                               double** jacobians) const {
           const double b1 = parameters[0][0];
           const double b2 = parameters[0][1];
           const double b3 = parameters[0][2];
           const double b4 = parameters[0][3];
    
           residuals[0] = b1 *  pow(1 + exp(b2 -  b3 * x_), -1.0 / b4) - y_;
    
           if (!jacobians) return true;
           		double* jacobian = jacobians[0];
           if (!jacobian) return true;
    
           jacobian[0] = pow(1 + exp(b2 - b3 * x_), -1.0 / b4);
           jacobian[1] = -b1 * exp(b2 - b3 * x_) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
           jacobian[2] = x_ * b1 * exp(b2 - b3 * x_) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
           jacobian[3] = b1 * log(1 + exp(b2 - b3 * x_)) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4) / (b4 * b4);
           return true;
         }
    
        private:
         const double x_;
         const double y_;
     };
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32

      上面的代价函数的很明显太冗余了,不方便阅读和编写程序。因此,在实践中,我们将缓存一些子表达式以提高其效率,这将给我们提供如下内容:

    class Rat43AnalyticOptimized : public SizedCostFunction<1,4> {
       public:
         Rat43AnalyticOptimized(const double x, const double y) : x_(x), y_(y) {}
         virtual ~Rat43AnalyticOptimized() {}
         virtual bool Evaluate(double const* const* parameters,
                               double* residuals,
                               double** jacobians) const {
           const double b1 = parameters[0][0];
           const double b2 = parameters[0][1];
           const double b3 = parameters[0][2];
           const double b4 = parameters[0][3];
    
           const double t1 = exp(b2 -  b3 * x_);
           const double t2 = 1 + t1;
           const double t3 = pow(t2, -1.0 / b4);
           residuals[0] = b1 * t3 - y_;
    
           if (!jacobians) return true;
           double* jacobian = jacobians[0];
           if (!jacobian) return true;
    
           const double t4 = pow(t2, -1.0 / b4 - 1);
           jacobian[0] = t3;
           jacobian[1] = -b1 * t1 * t4 / b4;
           jacobian[2] = -x_ * jacobian[1];
           jacobian[3] = b1 * log(t2) * t3 / (b4 * b4);
           return true;
         }
    
       private:
         const double x_;
         const double y_;
     };
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33

    这两种表达式的时间耗时如下,来自官网
    在这里插入图片描述

    三、完整代码

    #include 
    #include 
    #include 
    
    class Rat43Analytic : public ceres::SizedCostFunction<1,4>
    {
    private:
        const double x_;
        const double y_;
    public:
        Rat43Analytic(const double x,const double y):x_(x),y_(y){}
        virtual ~Rat43Analytic(){}
        virtual bool Evaluate(double const* const* parameters,
                              double* residuals,
                              double** jacobians) const{
            const double b1=parameters[0][0];
            const double b2=parameters[0][1];
            const double b3=parameters[0][2];
            const double b4=parameters[0][3];
    
            residuals[0]=b1*pow(1+exp(b2-b3*x_),-1.0/b4)-y_;
    
            if(!jacobians) return true;
    
            double* jacobian=jacobians[0];
            if(!jacobian)  return true;
    
           jacobian[0] = pow(1 + exp(b2 - b3 * x_), -1.0 / b4);
           jacobian[1] = -b1 * exp(b2 - b3 * x_) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
           jacobian[2] = x_ * b1 * exp(b2 - b3 * x_) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
           jacobian[3] = b1 * log(1 + exp(b2 - b3 * x_)) *
                         pow(1 + exp(b2 - b3 * x_), -1.0 / b4) / (b4 * b4);
            return true;
        }
    };
    
    class Rat43AnalyticOptimized : public ceres::SizedCostFunction<1,4> {
       public:
         Rat43AnalyticOptimized(const double x, const double y) : x_(x), y_(y) {}
         virtual ~Rat43AnalyticOptimized() {}
         virtual bool Evaluate(double const* const* parameters,
                               double* residuals,
                               double** jacobians) const {
           const double b1 = parameters[0][0];
           const double b2 = parameters[0][1];
           const double b3 = parameters[0][2];
           const double b4 = parameters[0][3];
    
           const double t1 = exp(b2 -  b3 * x_);
           const double t2 = 1 + t1;
           const double t3 = pow(t2, -1.0 / b4);
           residuals[0] = b1 * t3 - y_;
    
           if (!jacobians) return true;
           double* jacobian = jacobians[0];
           if (!jacobian) return true;
    
           const double t4 = pow(t2, -1.0 / b4 - 1);
           jacobian[0] = t3;
           jacobian[1] = -b1 * t1 * t4 / b4;
           jacobian[2] = -x_ * jacobian[1];
           jacobian[3] = b1 * log(t2) * t3 / (b4 * b4);
           return true;
        }
    
       private:
         const double x_;
         const double y_;
     };
    
    
    int main(int argc, char const *argv[])
    {
        /*生成观测数据及初始化参数*/
        double B1=1.0,B2= 2.0,B3=2.0,B4=2.0;//true value
        double b1=2.0,b2=-1.0,b3=0.0,b4=1.5;//estimate value
        int N=200;//num of data
        double w_sigma=0.02;//sigma of noise
        /*此处仅用opencv库添加随机噪声,如果有其余添加噪声的方法,可以不用调用opencv*/
        cv::RNG rng((unsigned)time(NULL));//opencv random generator
        std::vector<double> x_data,y_data;
        for (int i = 0; i < N; i++)
        {
            double X=i;
            x_data.push_back(X);
            y_data.push_back(B1*pow(1+exp(B2-B3*X),-1/B4)+rng.gaussian(w_sigma*w_sigma));
        }
    
        /*定义需要优化的变量,并赋予初值*/
        double parameters[4]={b1,b2,b3,b4};
        const double parameters_init[4]={b1,b2,b3,b4};//赋予初值
    
    
        /*构建问题*/
        ceres::Problem problem;
        /*建立优化方程*/
        ceres::LossFunction* loss_function=new ceres::HuberLoss(1.0);
        for (int i = 0; i < N; i++)
        {
            ceres::CostFunction* cost_function=new Rat43Analytic(x_data[i],y_data[i]);
            // ceres::CostFunction* cost_function=new Rat43AnalyticOptimized(x_data[i],y_data[i]);
            problem.AddResidualBlock(cost_function,loss_function,parameters);
        }
        
        problem.SetParameterLowerBound(parameters,0,-3.5);
        problem.SetParameterUpperBound(parameters,0, 3.5);
        problem.SetParameterLowerBound(parameters,1,-3.5);
        problem.SetParameterUpperBound(parameters,1, 3.5);
        problem.SetParameterLowerBound(parameters,2,-3.5);
        problem.SetParameterUpperBound(parameters,2, 3.5);
        problem.SetParameterLowerBound(parameters,3,-3.5);
        problem.SetParameterUpperBound(parameters,3, 3.5);
    
        /*运行优化器*/
        ceres::Solver::Options options;
        options.linear_solver_type=ceres::DENSE_QR;
        options.max_num_iterations=5e2;
        options.function_tolerance=1e-10;
        options.minimizer_progress_to_stdout=true;
        ceres::Solver::Summary summary;
        ceres::Solve(options,&problem,&summary);
    
        std::cout << "parameter_true : " << B1
                                       <<" "<<B2
                                       <<" "<<B3
                                       <<" "<<B4
                                       <<" " <<std::endl;
    
        std::cout << "parameter_init : " << parameters_init[0]
                                        <<" "<<parameters_init[1]
                                        <<" "<<parameters_init[2]
                                        <<" "<<parameters_init[3]
                                        <<" " <<std::endl;
    
        std::cout << "parameter_final : " << parameters[0]
                                        <<" "<<parameters[1]
                                        <<" "<<parameters[2]
                                        <<" "<<parameters[3]
                                        <<" " <<std::endl;
    
        std::cout << "parameter_error : " << parameters[0]-B1
                                        <<" "<<parameters[1]-B2
                                        <<" "<<parameters[2]-B3
                                        <<" "<<parameters[3]-B4
                                        <<" " <<std::endl;
        
        return 0;
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150

    四、输出结果

    parameter_true : 1 2 2 2 
    parameter_init : 2 -1 0 1.5 
    parameter_final : 0.999944 2.00035 1.99574 2.00476 
    parameter_error : -5.6159e-05 0.000353811 -0.0042581 0.00476159 
    
    • 1
    • 2
    • 3
    • 4

    五、配置文件

    cmake_minimum_required(VERSION 2.8)
    project(ceresCurveFitting)
    set(CMAKE_CXX_STANDARD 14)
    find_package(OpenCV 3 REQUIRED )
    find_package(Ceres REQUIRED )
    include_directories(${OpenCV_INCLUDE_DIRS})
    include_directories( ${CERES_INCLUDE_DIRS})
    include_directories("/usr/include/eigen3")
    
    add_executable(AnalyticDerivatives AnalyticDerivatives.cpp)
    target_link_libraries(AnalyticDerivatives ${CERES_LIBRARIES} ${OpenCV_LIBS})
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    六、适用情况

    解析导数适用情况参考了官网的介绍,如下。
    在这里插入图片描述

    1. 表达式比较简单,大部分是线性的;
    2. 计算机可以对其进行微分;
    3. 需要获取比自动求导更好的性能的情况;
    4. 导数无法计算;
    5. 你喜欢使用链式求导,并且乐于手推公式。
  • 相关阅读:
    微信小程序开发---小程序的页面配置
    10.4 认识Capstone反汇编引擎
    HTML+CSS大作业:使用html设计一个简单好看的公司官网首页 浮动布局
    MySQL中json类型,你使用过吗
    【skynet】skynet消息处理与协程
    WebGL:基础练习 / 简单学习 / demo / canvas3D
    智能仓储立库核心干货|立体化立体仓库类型是如何进行叉车和堆垛机配置?
    java计算机毕业设计ssm+vue灰灰宠物美容网站
    使用Jenkins部署Git仓库微服务项目
    animate.css
  • 原文地址:https://blog.csdn.net/qq_39400324/article/details/125914496