目录
- class LocalizationMonitor : public RecurrentRunner {
- public:
- LocalizationMonitor();
- void RunOnce(const double current_time) override;
- };
-
- void LocalizationMonitor::RunOnce(const double current_time) {
- auto manager = MonitorManager::Instance();
- auto* component = apollo::common::util::FindOrNull(
- *manager->GetStatus()->mutable_components(),
- FLAGS_localization_component_name);
- if (component == nullptr) {
- // localization is not monitored in current mode, skip.
- return;
- }
-
- static auto reader =
- manager->CreateReader
(FLAGS_localization_msf_status); - reader->Observe();
- const auto status = reader->GetLatestObserved();
-
- ComponentStatus* component_status = component->mutable_other_status();
- component_status->clear_status();
- if (status == nullptr) {
- SummaryMonitor::EscalateStatus(ComponentStatus::ERROR,
- "No LocalizationStatus received",
- component_status);
- return;
- }
-
- // Translate LocalizationStatus to ComponentStatus. Note that ERROR and FATAL
- // will trigger safety mode in current settings.
- switch (status->fusion_status()) {
- case MeasureState::OK:
- SummaryMonitor::EscalateStatus(ComponentStatus::OK, "", component_status);
- break;
- case MeasureState::WARNNING:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::WARN,
- absl::StrCat("WARNNING: ", status->state_message()),
- component_status);
- break;
- case MeasureState::ERROR:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::WARN,
- absl::StrCat("ERROR: ", status->state_message()), component_status);
- break;
- case MeasureState::CRITICAL_ERROR:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::ERROR,
- absl::StrCat("CRITICAL_ERROR: ", status->state_message()),
- component_status);
- break;
- case MeasureState::FATAL_ERROR:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::FATAL,
- absl::StrCat("FATAL_ERROR: ", status->state_message()),
- component_status);
- break;
- default:
- AFATAL << "Unknown fusion_status: " << status->fusion_status();
- break;
- }
- }
## Check MSF Localization Status We provide a simple way to check lidar localization, GNSS localization and fusion localization status. There are four states {NOT_VALID, NOT_STABLE, OK, VALID} for localization status. You can simply use `rostopic echo /apollo/localization/msf_status` to check localization status. If fusion_status is VALID or OK, the output of msf localization is reliable.
上述是apollo MSF 定位状态的判断逻辑,上述故障都是由业务模块定位部分设置并发出的。
下面是modules/localization/rtk/rtk_localization.cc的状态检测部分
- void RTKLocalization::FillLocalizationStatusMsg(
- const drivers::gnss::InsStat &status,
- LocalizationStatus *localization_status) {
- apollo::common::Header *header = localization_status->mutable_header();
- double timestamp = apollo::cyber::Clock::NowInSeconds();
- header->set_timestamp_sec(timestamp);
- localization_status->set_measurement_time(status.header().timestamp_sec());
-
- if (!status.has_pos_type()) {
- localization_status->set_fusion_status(MeasureState::ERROR);
- localization_status->set_state_message(
- "Error: Current Localization Status Is Missing.");
- return;
- }
- class CameraMonitor : public RecurrentRunner {
- public:
- CameraMonitor();
- void RunOnce(const double current_time) override;
-
- private:
- static void UpdateStatus(ComponentStatus* status);
- };
- void CameraMonitor::RunOnce(const double current_time) {
- auto* manager = MonitorManager::Instance();
- auto* component = apollo::common::util::FindOrNull(
- *manager->GetStatus()->mutable_components(), FLAGS_camera_component_name);
- if (component == nullptr) {
- // camera is not monitored in current mode, skip.
- return;
- }
- auto* status = component->mutable_other_status();
- UpdateStatus(status);
- }
除了判断camera是不是被配置为监控配置之外核心函数在UpdateStatus 中
- void CameraMonitor::UpdateStatus(ComponentStatus* status) {
- status->clear_status();
- std::string frame_id = "";
- for (const auto& topic : camera_topic_set) {
- const auto& reader_message_pair = CreateReaderAndLatestsMessage(topic);
- const auto& reader = reader_message_pair.first;
- const auto& message = reader_message_pair.second;
- if (reader != nullptr && message != nullptr) {
- if (frame_id.empty()) {
- const auto& header = message->header();
- if (header.has_frame_id()) {
- frame_id = header.frame_id();
- }
- } else {
- SummaryMonitor::EscalateStatus(
- ComponentStatus::ERROR,
- absl::StrCat("Only one camera is permitted"), status);
- }
- }
- }
- if (frame_id.empty()) {
- SummaryMonitor::EscalateStatus(
- ComponentStatus::ERROR, absl::StrCat("No camera is detected"), status);
- } else {
- SummaryMonitor::EscalateStatus(
- ComponentStatus::OK, absl::StrCat("Detected one camera: ", frame_id),
- status);
- }
- }
- static const auto camera_topic_set = std::set
{ - FLAGS_image_long_topic, FLAGS_camera_image_long_topic,
- FLAGS_camera_image_short_topic, FLAGS_camera_front_6mm_topic,
- FLAGS_camera_front_6mm_2_topic, FLAGS_camera_front_12mm_topic,
- // Add more cameras here if you want to monitor.
- };
absl::StrCat("Only one camera is permitted"), status);
如果frame id 是 empty,就报ERROR
ComponentStatus::ERROR, absl::StrCat("No camera is detected"), status);
- // Check if we need to switch to safe mode, and then
- // 1. Notify driver to take action.
- // 2. Trigger Guardian if no proper action was taken.
- class FunctionalSafetyMonitor : public RecurrentRunner {
- public:
- FunctionalSafetyMonitor();
- void RunOnce(const double current_time);
-
- private:
- bool CheckSafety();
- };
- void FunctionalSafetyMonitor::RunOnce(const double current_time) {
- auto* system_status = MonitorManager::Instance()->GetStatus();
- // Everything looks good or has been handled properly.
- if (CheckSafety()) {
- system_status->clear_passenger_msg();
- system_status->clear_safety_mode_trigger_time();
- system_status->clear_require_emergency_stop();
- return;
- }
- if (system_status->require_emergency_stop()) {
- // EStop has already been triggered.
- return;
- }
-
- // Newly entered safety mode.
- system_status->set_passenger_msg("Error! Please disengage.");
- if (!system_status->has_safety_mode_trigger_time()) {
- system_status->set_safety_mode_trigger_time(current_time);
- return;
- }
-
- // Trigger EStop if no action was taken in time.
- if (system_status->safety_mode_trigger_time() +
- FLAGS_safety_mode_seconds_before_estop <
- current_time) {
- system_status->set_require_emergency_stop(true);
- }
- }
- bool FunctionalSafetyMonitor::CheckSafety() {
- // We only check safety in self driving mode.
- auto manager = MonitorManager::Instance();
- if (!manager->IsInAutonomousMode()) {
- return true;
- }
-
- // Check HMI modules status.
- const auto& mode = manager->GetHMIMode();
- const auto& hmi_modules = manager->GetStatus()->hmi_modules();
- for (const auto& iter : mode.modules()) {
- const std::string& module_name = iter.first;
- const auto& module = iter.second;
- if (module.required_for_safety() &&
- !IsSafe(module_name, hmi_modules.at(module_name))) {
- return false;
- }
- }
-
- // Check monitored components status.
- const auto& components = manager->GetStatus()->components();
- for (const auto& iter : mode.monitored_components()) {
- const std::string& component_name = iter.first;
- const auto& component = iter.second;
- if (component.required_for_safety() &&
- !IsSafe(component_name, components.at(component_name).summary())) {
- return false;
- }
- }
-
- // Everything looks good.
- return true;
- }
recorder monitor 是对于是apollo 对于记录服务的监控,方法是通过订阅/apollo/data/recorder/status 这个topic 获取Recorder status。
- class RecorderMonitor : public RecurrentRunner {
- public:
- RecorderMonitor();
- void RunOnce(const double current_time) override;
- };
-
- void RecorderMonitor::RunOnce(const double current_time) {
- auto manager = MonitorManager::Instance();
- auto* component = apollo::common::util::FindOrNull(
- *manager->GetStatus()->mutable_components(),
- FLAGS_smart_recorder_component_name);
- if (component == nullptr) {
- // SmartRecorder is not monitored in current mode, skip.
- return;
- }
-
- static auto reader =
- manager->CreateReader
(FLAGS_recorder_status_topic); - reader->Observe();
- const auto status = reader->GetLatestObserved();
-
- ComponentStatus* component_status = component->mutable_other_status();
- component_status->clear_status();
- if (status == nullptr) {
- SummaryMonitor::EscalateStatus(ComponentStatus::ERROR,
- "No SmartRecorderStatus received",
- component_status);
- return;
- }
-
- // Translate SmartRecorderStatus to ComponentStatus. Note that ERROR and FATAL
- // will trigger safety mode in current settings.
- switch (status->recording_state()) {
- case RecordingState::RECORDING:
- SummaryMonitor::EscalateStatus(ComponentStatus::OK, "", component_status);
- break;
- case RecordingState::TERMINATING:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::WARN,
- absl::StrCat("WARNNING: ", status->state_message()),
- component_status);
- break;
- case RecordingState::STOPPED:
- SummaryMonitor::EscalateStatus(
- ComponentStatus::OK,
- absl::StrCat("STOPPED: ", status->state_message()), component_status);
- break;
- default:
- AFATAL << "Unknown recording status: " << status->recording_state();
- break;
- }
- }
第一步依旧是判断recorder 是不是被配置的监控模块,如果不是直接返回。
然后就是直接判断status->recording_state(),如果是RecordingState::TERMINATING(终止)状态就报出一个WARNING 的故障
- enum RecordingState {
- STOPPED = 0;
- RECORDING = 1;
- TERMINATING = 2;
- }
-
- message SmartRecorderStatus {
- optional apollo.common.Header header = 1;
- optional RecordingState recording_state = 2;
- optional string state_message = 3;
- }
modules/data/tools/smart_recorder/realtime_record_processor.cc
我们可以在上述文件中找到recorder状态赋值情况,但是可惜apollo 中目前没有一个模块会主动填写RecordingState::TERMINATING(终止)状态。
- // A monitor which summarize other monitors' result and publish the whole status
- // if it has changed.
- class SummaryMonitor : public RecurrentRunner {
- public:
- SummaryMonitor();
- void RunOnce(const double current_time) override;
-
- // Escalate the status to a higher priority new status:
- // FATAL > ERROR > WARN > OK > UNKNOWN.
- static void EscalateStatus(const ComponentStatus::Status new_status,
- const std::string& message,
- ComponentStatus* current_status);
-
- private:
- size_t system_status_fp_ = 0;
- double last_broadcast_ = 0;
- };
-
-
- void SummaryMonitor::RunOnce(const double current_time) {
- auto manager = MonitorManager::Instance();
- auto* status = manager->GetStatus();
- // Escalate the summary status to the most severe one.
- for (auto& component : *status->mutable_components()) {
- auto* summary = component.second.mutable_summary();
- const auto& process_status = component.second.process_status();
- EscalateStatus(process_status.status(), process_status.message(), summary);
- const auto& module_status = component.second.module_status();
- EscalateStatus(module_status.status(), module_status.message(), summary);
- const auto& channel_status = component.second.channel_status();
- EscalateStatus(channel_status.status(), channel_status.message(), summary);
- const auto& resource_status = component.second.resource_status();
- EscalateStatus(resource_status.status(), resource_status.message(),
- summary);
- const auto& other_status = component.second.other_status();
- EscalateStatus(other_status.status(), other_status.message(), summary);
- }
-
- // Get fingerprint of current status.
- // Don't use DebugString() which has known bug on Map field. The string
- // doesn't change though the value has changed.
- static std::hash
hash_fn; - std::string proto_bytes;
- status->SerializeToString(&proto_bytes);
- const size_t new_fp = hash_fn(proto_bytes);
-
- if (system_status_fp_ != new_fp ||
- current_time - last_broadcast_ > FLAGS_system_status_publish_interval) {
- static auto writer =
- manager->CreateWriter
(FLAGS_system_status_topic); -
- apollo::common::util::FillHeader("SystemMonitor", status);
- writer->Write(*status);
- status->clear_header();
- system_status_fp_ = new_fp;
- last_broadcast_ = current_time;
- }
- }
针对前面所有的monitor 上报的故障信息,进行一个整合,然后发送到/apollo/monitor/system_status这个topic 上。