• 单节点大数据平台运维脚本


    单节点的大数据集群运维脚本

    vi /opt/bash/bigdata-operate-script.sh
    
    • 1
    #!/bin/bash
    
    
    source ~/.bashrc
    source /etc/profile
    
    hostname=bigdata
    
    #程序运行必要组件
    important_components=("kafka" "clickhouse-server" "elasticsearch" "kibana")
    #非必要组件
    monitor_components=("prometheus" "node_exporter" "pushgateway" "kafka_exporter" "grafana")
    #monitor脚本启动标志量
    m_flag=0
    #yarn上任务数
    job_num_count=12
    # Kafka 集群的地址
    KAFKA_BROKER="$hostname:9092"
    
    #ES API
    ES_API="http://$hostname:9200"
    #Kibana API
    KIBANA_API="http://$hostname:5601/api/status"
    #如果需要,添加基本认证
    ES_USER="ES用户名"
    ES_PASS="ES密码"
    
    function start_component() {
        if ! systemctl is-active --quiet "$1"; then
            echo "start $1"
            sudo systemctl start "$1"
        fi
    }
    
    function check_status() {
        #检查HDFS状态
        #echo "========Check HDFS status========"
        if /usr/java/jdk1.8.0_212/bin/jps | grep -q NameNode && /usr/java/jdk1.8.0_212/bin/jps | grep -q DataNode && hadoop fs -ls / >/dev/null 2>&1; then
            if hdfs dfsadmin -safemode get | grep -q ON; then
                echo "HDFS in safe mode"
            else
                echo "========HDFS is Running========"
                ((m_flag++))
            fi       
        else
            echo "--------HDFS is Not Running--------"
        fi
    
        #检查YARN状态
        #echo "========Check YARN status========"
        if /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager && yarn node -list >/dev/null 2>&1; then
            echo "========YARN is Running========"
            ((m_flag++))
        else
            echo "--------YARN is Not Running--------"
        fi
    
        #检查kafka状态
        #echo "========Check KAFKA status========"
        if kafka-topics.sh --bootstrap-server $KAFKA_BROKER --list >/dev/null 2>&1; then
            echo "========Kafka cluster is Running========"
            ((m_flag++))
        else
            echo "--------Kafka cluster is Not Running--------"
        fi
    
        #检查clickhouse状态
        #echo "========Check clickhouse-server status========"
        if systemctl is-active --quiet clickhouse-server && clickhouse-client --port 9123 --user clickhouse --password clickhouse密码 --query "SELECT 1" >/dev/null 2>&1; then
            echo "========ClickHouse-server service is Running========"
            ((m_flag++))
        else
            echo "--------ClickHouse-server service is Not Running--------"
        fi
    
        # 检查 Elasticsearch 服务状态
        if systemctl is-active --quiet elasticsearch && curl -s -u "$ES_USER:$ES_PASS" "$ES_API" >/dev/null 2>&1; then
            echo "========Elasticsearch service is Running========"
            ((m_flag++))
        else
            echo "--------Elasticsearch service is Not Running--------"
        fi
    
        # 检查 Kibana 服务状态
        kibana_status=$(curl -s -u "$ES_USER:$ES_PASS" "$KIBANA_API" | /usr/local/bin/jq -r '.status.overall.state')
        if systemctl is-active --quiet kibana && [ "$kibana_status" == "green" ]; then
            echo "========Kibana service is Running========"
            ((m_flag++))
        else
            echo "--------Kiaba service is Not Running--------"
        fi
    
        #检查监控组件状态
        for i in "${monitor_components[@]}"; do
            if systemctl is-active --quiet $i; then
                echo "========$i service is Running========"
            else
                echo "--------$i service is Not Running--------"
            fi
        done
    }
    
    case $1 in
    start)
    
        if ! /usr/java/jdk1.8.0_212/bin/jps | grep -q NameNode && ! hdfs dfsadmin -report >/dev/null 2>&1 && ! hadoop fs -ls / >/dev/null 2>&1; then
            echo "start HDFS"
            /bin/bash /opt/module/hadoop-3.3.5/sbin/start-dfs.sh
        fi
    
        if ! /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && ! /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager; then
            echo "start Yarn"
            /bin/bash /opt/module/hadoop-3.3.5/sbin/start-yarn.sh
        fi
    
        for i in "${important_components[@]}"; do
            start_component "$i"
        done
    
        sleep 30
    
        for i in "${monitor_components[@]}"; do
            start_component "$i"
        done
    
        sleep 20
    
        #检查组件状态
        check_status
    
        #判断是否满足启动任务条件
        if [ "$m_flag" -eq 6 ] && ! [ "$(curl -s http://$hostname:8088/ws/v1/cluster/metrics | /usr/local/bin/jq .clusterMetrics.appsRunning)" -eq $job_num_count ]; then
            /bin/bash /home/bigdata/job/dw3/monitor.sh
        fi
        ;;
    
    status)
        check_status
        ;;
    
    stop)
        if /usr/java/jdk1.8.0_212/bin/jps | grep -q ResourceManager && /usr/java/jdk1.8.0_212/bin/jps | grep -q NodeManager; then
            #kill掉所有程序
            /bin/bash /home/bigdata/killAllApp.sh
            #判断程序是否都停掉
            if [ "$(curl -s http://$hostname:8088/ws/v1/cluster/metrics | /usr/local/bin/jq .clusterMetrics.appsRunning)" -eq "0" ]; then
                #关闭所有组件
                /bin/bash /opt/module/hadoop-3.3.5/sbin/stop-yarn.sh
                /bin/bash /opt/module/hadoop-3.3.5/sbin/stop-dfs.sh
                for i in "${important_components[@]}"; do
                    sudo systemctl stop "$i"
                done
    
            else
                echo "Yarn上程序未停完,需手动停止"
            fi
        fi
    
        for i in "${monitor_components[@]}"; do
            if systemctl is-active --quiet $i; then
                echo "========stop $i service========"
                sudo systemctl stop "$i"
            fi
        done
        check_status
        ;;
    *)
        echo "请输入合法的参数"
        echo "  start   启动所有组件集群"
        echo "  status  查看所有组件集群状态"
        echo "  stop    停止所有组件集群"
        ;;
    esac
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173

    kill掉yarn所有带有ETL名字的任务

    vi /home/bigdata/killAllApp.sh
    
    • 1
    #!/bin/bash
    yarn application -list | grep 'ETL'| grep -v grep | awk '{print $1}' | xargs  yarn application -kill
    echo -e "\033[1;33m============Yarn Application List============\033[0m"
    yarn application -list
    
    • 1
    • 2
    • 3
    • 4
  • 相关阅读:
    钉钉h5微应用调试 整理
    C++设计模式_13_Flyweight享元模式
    代码随想录算法训练营 Day35 贪心算法4
    应用软件漏洞排名
    第21章 Spring事务管理之扩展篇(一)
    力扣刷题:正则表达式匹配、
    React函数式写法和类式写法的区别(以一个计数器功能为例子)
    两万字长文世界编程语言大串讲
    NR PDSCH (二)频域资源
    Spring Boot中使用MongoDB完成数据存储
  • 原文地址:https://blog.csdn.net/xfp1007907124/article/details/136303135