• PX4模块设计之十六:Hardfault模块


    1. Hardfault模块初始化

    nsh_main之前的启动过程可参考:PX4模块设计之十:PX4启动过程。Hardfault是通过启动脚本进行启动的。

    注:这个模块主要使用了电池供电情况下SRAM持续保持数据的特性,从而保证系统异常宕机后,再次重启仍能获取宕机时的异常情况(通常这种临界异常无法保存在日志中)。

    nsh_main
     └──> nsh_initialize
         └──> boardctl(BOARDIOC_INIT, 0)
             └──> board_app_initialize
                 └──> board_hardfault_init
                     └──> stm32_bbsraminitialize(BBSRAM_PATH, filesizes);  // "/fs/bbr", The path to the Battery Backed up SRAM
                         └──> stm32_bbsram_probe
                             └──> register_driver(devname, &stm32_bbsram_fops, 0666, &g_bbsram[i]);
    
    
    static const struct file_operations stm32_bbsram_fops =
    {
      .open   = stm32_bbsram_open,
      .close  = stm32_bbsram_close,
      .read   = stm32_bbsram_read,
      .write  = stm32_bbsram_write,
      .seek   = stm32_bbsram_seek,
      .ioctl  = stm32_bbsram_ioctl,
      .poll   = stm32_bbsram_poll,
    #ifndef CONFIG_DISABLE_PSEUDOFS_OPERATIONS
      .unlink = stm32_bbsram_unlink,
    #endif
    };
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23

    启动脚本类似如下:

    if [ $SDCARD_AVAILABLE = yes ]
    then
    	if hardfault_log check
    	then
    		set STARTUP_TUNE 2 # tune 2 = ERROR_TUNE
    		if hardfault_log commit
    		then
    			hardfault_log reset
    		fi
    	fi
    fi
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11

    2. Hardfault模块主程序

    hardfault_log_main
     ├──> 
     │   └──> hardfault_check_status
     ├──> 
     │   └──> hardfault_rearm
     ├──> 
     │   └──> genfault
     ├──> 
     │   └──> hardfault_commit
     ├──> 
     │   └──> hardfault_increment_reboot false
     └──> 
         └──> hardfault_increment_reboot true
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13

    3. Hardfault命令

    hardfault_log  [arguments...]
     Commands:
       check         Check if there's an uncommitted hardfault
    
       rearm         Drop an uncommitted hardfault
    
       fault         Generate a hardfault (this command crashes the system :)
         [0|1]       Hardfault type: 0=divide by 0, 1=Assertion (default=0)
    
       commit        Write uncommitted hardfault to /fs/microsd/fault_%i.txt (and
                     rearm, but don't reset)
    
       count         Read the reboot counter, counts the number of reboots of an
                     uncommitted hardfault (returned as the exit code of the
                     program)
    
       reset         Reset the reboot counter
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17

    3.1 hardfault_check_status

    检查是否有未提交的hardfault记录,如果有就打印输出。

    hardfault_check_status
     ├──> int fd = hardfault_get_desc(caller, &desc, true);
     ├──> 
     │   └──> hfsyslog(LOG_INFO, "Failed to open Fault Log file [%s] (%d)\n", HARDFAULT_PATH, ret);
     ├──> 
     │   └──> hfsyslog(LOG_INFO, "Fault Log is Armed\n");
     └──> 
         ├──> int rv = close(fd);
         ├──> 
         │   └──> hfsyslog(LOG_INFO, "Failed to Close Fault Log (%d)\n", rv);
         └──> 
             ├──> hfsyslog(LOG_INFO, "Fault Log info File No %" PRIu8 " Length %" PRIu16 " flags:0x%02" PRIx16 " state:%d\n", desc.fileno, desc.len, desc.flags, state);
             ├──> 
             │   ├──> format_fault_time(HEADER_TIME_FMT, &desc.lastwrite, buf, arraySize(buf));
             │   ├──> identify(caller);
             │   ├──> hfsyslog(LOG_INFO, "Fault Logged on %s - Valid\n", buf);
             └──> 
                 └──> rv = hardfault_rearm(caller);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18

    3.2 hardfault_rearm

    删除内存中的hardfault数据。

    hardfault_rearm
     ├──> int rv = unlink(HARDFAULT_PATH);
     ├──> 
     │   └──> hfsyslog(LOG_INFO, "Failed to re arming Fault Log (%d)\n", rv);
     └──> 
         └──> syslog(LOG_INFO, "Fault Log is Armed\n");
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6

    3.3 genfault

    略,模拟产生一个除以0的异常,导致系统宕机。

    3.4 hardfault_commit

    将电池供电持续保持内存中的异常数据转存到crashdump文件和ULog文件。

    hardfault_commit
     ├──> ret = hardfault_get_desc(caller, &desc, false);
     ├──> 
     │   └──> return
     ├──> state = (desc.lastwrite.tv_sec || desc.lastwrite.tv_nsec) ?  OK : 1;
     ├──> int rv = close(fd);
     ├──> 
     │   ├──> hfsyslog(LOG_INFO, "Failed to Close Fault Log (%d)\n", rv);
     │   └──> return
     ├──> 
     │   ├──> hfsyslog(LOG_INFO, "Nothing to save\n");
     │   ├──> ret = -ENOENT;
     │   └──> return
     ├──> format_fault_file_name(&desc.lastwrite, path, arraySize(path));
     ├──> hardfault_write(caller, fdout, HARDFAULT_FILE_FORMAT, true);  //该函数内部将会unlink hardfault文件
     └──> hardfault_append_to_ulog(caller, fdout);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16

    3.5 hardfault_increment_reboot

    重启计数

    hardfault_increment_reboot
     ├──> int fd = open(HARDFAULT_REBOOT_PATH, O_RDWR | O_CREAT);
     ├──> 
     │   ├──> hfsyslog(LOG_INFO, "Failed to open Fault reboot count file [%s] (%d)\n", HARDFAULT_REBOOT_PATH, ret);
     │   └──> return
     ├──> 
     │   └──> [count++,并写入BBSRAM设备0]
     └──> 
         └──> [count重置0,并写入BBSRAM设备0]
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    4. BBSRAM设备

    总共是5个BBSRAM设备,使用用途如下:

    1. 【4字节】计数
    2. 【384字节】当前飞行参数拷贝A
    3. 【384字节】当前飞行参数拷贝B
    4. 【64字节】当前ULog全路径文件名
    5. 【剩余字节】宕机日志

    注:STM32F7设备默认是4K字节(STM32F7_BBSRAM_SIZE 4096)。

    #define HARDFAULT_REBOOT_FILENO 0
    #define HARDFAULT_REBOOT_PATH BBSRAM_PATH "" STRINGIFY(HARDFAULT_REBOOT_FILENO)
    #define HARDFAULT_ULOG_FILENO 3
    #define HARDFAULT_ULOG_PATH BBSRAM_PATH "" STRINGIFY(HARDFAULT_ULOG_FILENO)
    #define HARDFAULT_FILENO 4
    #define HARDFAULT_PATH BBSRAM_PATH "" STRINGIFY(HARDFAULT_FILENO)
    
    
    #if defined(CONFIG_STM32F7_STM32F74XX) || defined(CONFIG_STM32F7_STM32F75XX) || \
        defined(CONFIG_STM32F7_STM32F76XX) || defined(CONFIG_STM32F7_STM32F77XX)
    #  define STM32F7_BBSRAM_SIZE 4096
    #else
    #  error "No backup SRAM on this STM32 Device"
    #endif
    
    #define HARDFAULT_MAX_ULOG_FILE_LEN 64 /* must be large enough to store the full path to the log file */
    
    #define BBSRAM_SIZE_FN0 (sizeof(int))
    #define BBSRAM_SIZE_FN1 384     /* greater then 2.5 times the size of vehicle_status_s */
    #define BBSRAM_SIZE_FN2 384     /* greater then 2.5 times the size of vehicle_status_s */
    #define BBSRAM_SIZE_FN3 HARDFAULT_MAX_ULOG_FILE_LEN
    #define BBSRAM_SIZE_FN4 -1
    
    
    /* The path to the Battery Backed up SRAM */
    #define BBSRAM_PATH "/fs/bbr"
    /* The sizes of the files to create (-1) use rest of BBSRAM memory */
    #define BSRAM_FILE_SIZES { \
    		BBSRAM_SIZE_FN0,   /* For Time stamp only */                  \
    		BBSRAM_SIZE_FN1,   /* For Current Flight Parameters Copy A */ \
    		BBSRAM_SIZE_FN2,   /* For Current Flight Parameters Copy B */ \
    		BBSRAM_SIZE_FN3,   /* For the latest ULog file path */        \
    		BBSRAM_SIZE_FN4,   /* For the Panic Log use rest of space */  \
    		0                  /* End of table marker */                  \
    	}
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35

    5. 总结

    总体上看,这个BBSRAM设备主要用途就是记录coredump数据,类似系统的一个黑匣子。

    注:配合ULog日志和之前做基站固件的时候所做的【基站软件Linux技术验证和分析:黑匣子技术】非常接近,只不过当时有多级Boot和版本管理回退等底层考虑。

    6. 参考资料

    【1】PX4开源软件框架简明简介
    【2】PX4 Hardfault模块

  • 相关阅读:
    SQL必需掌握的100个重要知识点:用通配符进行过滤
    MySQL主从复制与读写分离
    Events explained
    django计算机毕业设计基于安卓Android/微信小程序的移动电商平台系统APP-商品购物商城app
    压缩感知学习
    1.查找存在于数组x但不存在于数组y的元素np.setdiff1d()2.查找两个数组交集以外(不同时存在于两个数组中)的元素np.setxor1d()
    vue3.x+ts项目创建,配置流程
    微信小程序数据监听器小案例
    Linux磁盘分配 把home的空间扩容给root
    数据结构与算法之Hash&BitMap
  • 原文地址:https://blog.csdn.net/lida2003/article/details/126171977