一、前言
说一下为什么使用orchestrator作为数据库的高可用组件而不是MHA,因为mha组件存在组件单点问题,而orchestrator组件可以组建集群,不会出现单点问题
二、搭建
搭建mysql gtid主从
上面的主从配置是为了 orchestrator数据库高可用组件的使用,其中有几个配置不能出错
主从配置中的CHANGE MASTER TO MASTER_HOST='rabbitmq02',必须使用主机名,不能使用ip地址,不然在使用orchestrator组件做mysql故障切换时会出问题
还有就是对于主从同步用户的权限配置也是加多了几项,而不是单纯的配置REPLICATION SLAVE权限
搭建orchestrator集群
在github上下载orchestrator的安装包
地址:Releases · openark/orchestrator · GitHub
这里使用三台主机构建orchestrator集群,使用raft协议使用高可用,允许一台主机宕机
ip | port |
10.1.60.118 | 3306、3001 |
10.1.60.120 | 3306、3001 |
10.1.60.125 | 3306、3001 |
需要先为每台主机安装mysql服务
参考:yum安装mysql 5.7_yum安装mysql5.7_Apex Predator的博客-CSDN博客
创建 orchestrator数据库与用户
mysql -u root -p
create database orchestrator;
create user 'orchestrator'@'%' identified by '11111111';
grant all on orchestrator.* to 'orchestrator'@'%';
flush privileges;
将安装包分别放到三台主机上并解压
mkdir /opt/orchestrator && cd /opt/orchestrator
tar -zxvf orchestrator-3.2.5-linux-amd64.tar.gz && ls
拷贝配置文件(三台主机都需要操作)
cd usr/local/orchestrator/
cp orchestrator-sample.conf.json orchestrator.conf.json
编辑配置文件(三台主机都需要操作)
vi orchestrator.conf.json
- {
- "Debug": true,
- "EnableSyslog": true,
- "ListenAddress": ":3001", #默认是使用3000端口,但是我这边有服务被使用了所以改成3001端口
- "MySQLTopologyUser": "orchestrator", #配置被监控的主从数据库的用户
- "MySQLTopologyPassword": "11111111", #配置被监控的主从数据库的用户密码
- "MySQLTopologyCredentialsConfigFile": "",
- "MySQLTopologySSLPrivateKeyFile": "",
- "MySQLTopologySSLCertFile": "",
- "MySQLTopologySSLCAFile": "",
- "MySQLTopologySSLSkipVerify": true,
- "MySQLTopologyUseMutualTLS": false,
- "MySQLOrchestratorHost": "10.1.60.125", #配置orchestrator组件服务本机地址
- "MySQLOrchestratorPort": 3306, #配置orchestrator组件服务数据库端口
- "MySQLOrchestratorDatabase": "orchestrator", #配置orchestrator组件数据库
- "MySQLOrchestratorUser": "orchestrator", #配置orchestrator数据库用户
- "MySQLOrchestratorPassword": "11111111", ##配置orchestrator数据库用户密码
- "MySQLOrchestratorCredentialsConfigFile": "",
- "MySQLOrchestratorSSLPrivateKeyFile": "",
- "MySQLOrchestratorSSLCertFile": "",
- "MySQLOrchestratorSSLCAFile": "",
- "MySQLOrchestratorSSLSkipVerify": true,
- "MySQLOrchestratorUseMutualTLS": false,
- "MySQLConnectTimeoutSeconds": 1,
- "RaftEnabled": true, #启用raft配置,以下配置默认是没有的,需要手动加上
- "RaftDataDir": "/opt/orchestrator", #配置orchestrator安装包地址
- "RaftBind": "10.1.60.125", #配置orchestrator组件服务本机地址
- "DefaultRaftPort": 10008, #配置raft服务端口
- "RaftNodes": [ #配置orchestrator组件集群地址
- "10.1.60.118",
- "10.1.60.120",
- "10.1.60.125"
- ],
- "DefaultInstancePort": 3306,
- "DiscoverByShowSlaveHosts": true,
- "InstancePollSeconds": 5,
- "DiscoveryIgnoreReplicaHostnameFilters": [
- "a_host_i_want_to_ignore[.]example[.]com",
- ".*[.]ignore_all_hosts_from_this_domain[.]example[.]com",
- "a_host_with_extra_port_i_want_to_ignore[.]example[.]com:3307"
- ],
- "UnseenInstanceForgetHours": 240,
- "SnapshotTopologiesIntervalHours": 0,
- "InstanceBulkOperationsWaitTimeoutSeconds": 10,
- "HostnameResolveMethod": "None", #此处需要更改为None,默认是default,使用默认的后面通过主机名添加mysql主从的时候会查找不到
- "MySQLHostnameResolveMethod": "SELECT concat(@@report_host,':',@@port)", #此处也需要更改为report_host模式
- "SkipBinlogServerUnresolveCheck": true,
- "ExpiryHostnameResolvesMinutes": 60,
- "RejectHostnameResolvePattern": "",
- "ReasonableReplicationLagSeconds": 10,
- "ProblemIgnoreHostnameFilters": [],
- "VerifyReplicationFilters": false,
- "ReasonableMaintenanceReplicationLagSeconds": 20,
- "CandidateInstanceExpireMinutes": 60,
- "AuditLogFile": "",
- "AuditToSyslog": false,
- "RemoveTextFromHostnameDisplay": ":3306",
- "ReadOnly": false,
- "AuthenticationMethod": "",
- "HTTPAuthUser": "",
- "HTTPAuthPassword": "",
- "AuthUserHeader": "",
- "PowerAuthUsers": [
- "*"
- ],
- "ClusterNameToAlias": {
- "127.0.0.1": "test suite"
- },
- "ReplicationLagQuery": "",
- "DetectClusterAliasQuery": "SELECT SUBSTRING_INDEX(@@hostname, '.', 1)",
- "DetectClusterDomainQuery": "",
- "DetectInstanceAliasQuery": "",
- "DetectPromotionRuleQuery": "",
- "DataCenterPattern": "[.]([^.]+)[.][^.]+[.]mydomain[.]com",
- "PhysicalEnvironmentPattern": "[.]([^.]+[.][^.]+)[.]mydomain[.]com",
- "PromotionIgnoreHostnameFilters": [],
- "DetectSemiSyncEnforcedQuery": "",
- "ServeAgentsHttp": false,
- "AgentsServerPort": ":3001", #此处也需要更改orchestrator组件默认的端口号
- "AgentsUseSSL": false,
- "AgentsUseMutualTLS": false,
- "AgentSSLSkipVerify": false,
- "AgentSSLPrivateKeyFile": "",
- "AgentSSLCertFile": "",
- "AgentSSLCAFile": "",
- "AgentSSLValidOUs": [],
- "UseSSL": false,
- "UseMutualTLS": false,
- "SSLSkipVerify": false,
- "SSLPrivateKeyFile": "",
- "SSLCertFile": "",
- "SSLCAFile": "",
- "SSLValidOUs": [],
- "URLPrefix": "",
- "StatusEndpoint": "/api/status",
- "StatusSimpleHealth": true,
- "StatusOUVerify": false,
- "AgentPollMinutes": 60,
- "UnseenAgentForgetHours": 6,
- "StaleSeedFailMinutes": 60,
- "SeedAcceptableBytesDiff": 8192,
- "PseudoGTIDPattern": "",
- "PseudoGTIDPatternIsFixedSubstring": false,
- "PseudoGTIDMonotonicHint": "asc:",
- "DetectPseudoGTIDQuery": "",
- "BinlogEventsChunkSize": 10000,
- "SkipBinlogEventsContaining": [],
- "ReduceReplicationAnalysisCount": true,
- "FailureDetectionPeriodBlockMinutes": 60,
- "FailMasterPromotionOnLagMinutes": 0,
- "RecoveryPeriodBlockSeconds": 3600,
- "RecoveryIgnoreHostnameFilters": [],
- "RecoverMasterClusterFilters": ["*"], #需要更改默认配置,改为*,不然主从故障自动切换不会生效
- "RecoverIntermediateMasterClusterFilters": ["*"], #需要更改默认配置,改为*,不然主从故障自动切换不会生效
- "OnFailureDetectionProcesses": [
- "echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countSlaves}' >> /tmp/recovery.log"
- ],
- "PreGracefulTakeoverProcesses": [
- "echo 'Planned takeover about to take place on {failureCluster}. Master will switch to read_only' >> /tmp/recovery.log"
- ],
- "PreFailoverProcesses": [
- "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/recovery.log"
- ],
- "PostFailoverProcesses": [
- "echo '(for all types) Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
- ],
- "PostUnsuccessfulFailoverProcesses": [],
- "PostMasterFailoverProcesses": [
- "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Promoted: {successorHost}:{successorPort}' >> /tmp/recovery.log"
- ],
- "PostIntermediateMasterFailoverProcesses": [
- "echo 'Recovered from {failureType} on {failureCluster}. Failed: {failedHost}:{failedPort}; Successor: {successorHost}:{successorPort}' >> /tmp/recovery.log"
- ],
- "PostGracefulTakeoverProcesses": [
- "echo 'Planned takeover complete' >> /tmp/recovery.log"
- ],
- "CoMasterRecoveryMustPromoteOtherCoMaster": true,
- "DetachLostSlavesAfterMasterFailover": true,
- "ApplyMySQLPromotionAfterMasterFailover": true,
- "PreventCrossDataCenterMasterFailover": false,
- "PreventCrossRegionMasterFailover": false,
- "MasterFailoverDetachReplicaMasterHost": false,
- "MasterFailoverLostInstancesDowntimeMinutes": 0,
- "PostponeReplicaRecoveryOnLagMinutes": 0,
- "OSCIgnoreHostnameFilters": [],
- "GraphiteAddr": "",
- "GraphitePath": "",
- "GraphiteConvertHostnameDotsToUnderscores": true,
- "ConsulAddress": "",
- "ConsulAclToken": "",
- "ConsulKVStoreProvider": "consul"
- }
启动orchestrator组件服务
nohup ./orchestrator http &>./orchestrator.log&
查看orchestrator组件服务是否正常
netstat -tlpn |grep 3001
访问 orchestrator组件web
接下来添加mysql主从,只需添加主从中的任意一台就会自动识别整个主从架构
关于主从故障的测试就不再演示了,可以自行操作,但是原来挂掉的主数据库重新启动后需要手动配置slave服务才能重新在加入主从架构中