• ceph学习笔记


    ceph

    ceph osd lspools
    
    
    rbd ls -p testpool
    
    
    #查看 ceph 集群中有多少个 pool,并且每个 pool 容量及利 用情况
    rados df
    
    
    ceph -s
    
    
    ceph osd tree
    ceph df
    
    ceph versions
    
    ceph osd pool ls
    
    ceph osd crush rule dump
    
    ceph auth print-key client.admin
    
    ceph orch host ls
    
    ceph crash ls
    
    ceph osd pool stats
    
    ceph df detail
    ceph osd stat
    
    ceph mon stat
    
    查看image rbd
    rbd ls -p kube
    
    ceph osd df
    ceph osd pool autoscale-status
    
    
    ceph: 
    10.240.62.11/12/13
    root:autelceph2  
    
    
    用户名:autel
    密码:Autonomy@Autel
    
    13 Autel#3913
    
    [root@ceph-admin ~]# ceph mgr services
    {
        "dashboard": "https://10.250.53.152:8443/",
        "prometheus": "http://10.250.53.152:9283/"
    }
    
    
    
    kubectl logs -f qinzhao-cache-resunet-demo-pipeline-wbkkh-2890309351 -n qinzhao -c lustre-importer-preload
    
    kubectl get nodes "-o=custom-columns=NAME:.metadata.name,GPU:.status.allocatable.nvidia\.com/gpu"
    
    apps/jupyter/jupyter-web-app/upstream/base/configs/spawner_ui_config.yaml
    
     kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio | kubectl apply -f -
    
    kustomize build  apps/tensorboard/tensorboard-controller/upstream/overlays/kubeflow | kubectl apply -f -
    
    kubectl get pods -n kubeflow -l kustomize.component=profiles
    
    https://www.amazonaws.cn/ec2/instance-types/
    
    kubectl taint node autel-poweredge-r750 nodetype=T4:NoExecute
    kubectl taint node autelrobotics-gpu10 nodetype=RTX3090:NoExecute
    autelrobotics-gpu10
    kubectl taint node autel-poweredge-r750 nodetype:NoExecute-
    
     kubectl taint node autelrobotics-gpu09 nodetype:NoSchedule-
     
      kubectl taint node autelrobotics-gpu09 nodetype:NoExecute-
      
      nodegroup=gpu:NoSchedule
      
      kubectl taint nodes autelrobotics-gpu02 nodegroup=gpu:NoSchedule
      
      kubectl label node autelrobotics-gpu02 gputype=A40
      
      lsof -n -P -i:22
      strace
    
    kubectl get csinode
    查看活跃进程个数
    top -H -p 1
    
    kubectl create secret tls ai-tls \
        --namespace ai-test \
        --key tls.key \
        --cert tls.pem
    
    https://github.com/NVIDIA/nvidia-docker/issues/1678
    nvidia-container-cli -k -d /dev/tty info
    
    
    ls -l /dev/char
    cat /etc/nvidia-container-runtime/config.toml
    
    stat -fc %T /sys/fs/cgroup/
    
    sar -n TCP,ETCP 1
    
     fdisk -l
     ldd
     
     # 修改后,重新挂载生效
    # mount -o remount /dev/shm
    
    nstat
    mpstat -P all 1
    slabtop
    pcstat
    
    netstat -ant | awk '{print $6}' | sort | uniq -c | sort -n
    
    dmesg -T
    pmap -x  1649 | sort -k 3 -n -r
    cat /proc/1649/smaps | grep 7f4250021000
    dump memory memory.dump 0x7f2340539000 0x7f235d553000
    
    strings memory.dump
    
    pidstat -p pid -r 1 1000
    sudo ./stackcount ip_output
    
    dmesg -Tw
    perf
    NetHogs
    
    iftop -i eth0 -P -N
    ./opensnoop -Tn snmp-pass
    slabtop
    
    nfsstat -c
    
    du -ah --max-depth=2 /var/log |sort -rh |head -10
    
    ./fileslower
    ulimit -a
    
    
    解决显存释放问题:
    fuser -v /dev/nvidia*
    
     lsof -Pni
     
     netstat -n | awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'
     
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159

    在这里插入图片描述

    NFS运维:

    systemctl status rpcbind nfs-server
    nfsiostat
    dmesg | grep nfs
    exportfs -v
    mpstat -P ALL 1
    ss -t -a |grep "IP"
    
    nfsstat -c
    iostat 
    
    iostat -d -x -k 1
    
    netstat -an | "IP:2049"
    
    dstat
    ps aux | grep /app
    
    https://learnku.com/articles/39851
    https://zhuanlan.zhihu.com/p/614314627
    
     fdisk -l
     blkid
     
     nfsiostat  1
     
     sar -b 1
     
     iostat -m -d /dev/md0 1
     
     strace -p pid 查看进程当前调用栈,查死循环或者卡顿时极为有用
    strace -eopen /usr/local/kk-mail/service/dovecot/sbin/dovecot  查看进程当前打开了哪些文件 
    
    cat /proc/715765/*/task/stack
    
    /proc/12544/task/12873/stack
    systemtap
    
    cat /var/log/Xorg.0.log |grep -i "nvidia"
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39

    ss

    sasfa

  • 相关阅读:
    RTSP/Onvif安防视频平台EasyNVR级联至EasyNVS系统不显示通道,是什么原因?
    OpenCV官方教程中文版 —— 直方图的计算,绘制与分析
    ABAP bgRFC
    创建图形 | 零代码批量创建地理缓冲区图形
    提升自媒体影音创作效率,这 10 款 AI 工具打工人必备!
    Apache Doris 发展历程、技术特性及云原生时代的未来规划
    深入理解树状数组
    软件测试的行业现状,我们的未来在哪里?
    IDEA日志操作
    anaconda+pytorch安装+pycharm环境配置
  • 原文地址:https://blog.csdn.net/qq_21816375/article/details/134457019