clickhouse各节点是独立平等的, 所谓的集群配置不过是在各节点配置其他节点的用户名密码以及分片和副本的信息。建表时指定表的分片和副本情况,所以有人说clickhouse集群是表级别的。
下载安装包
https://repo.yandex.ru/clickhouse/deb/stable/main/
我使用的是https://download.csdn.net/download/shy_snow/86830036
deb文件安装,安装过程中会要求设置密码
apt-get clickhouse*.deb
配置组网metrika.xml
<yandex>
<clickhouse_remote_servers>
<cluster_3S_2R>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>10.167.85.24host>
<port>9000port>
<password>123456password>
replica>
<replica>
<host>10.167.85.25host>
<port>9988port>
<password>123456password>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>10.167.85.25host>
<port>9000port>
<password>123456password>
replica>
<replica>
<host>10.167.85.26host>
<port>9988port>
<password>123456password>
replica>
shard>
<shard>
<internal_replication>trueinternal_replication>
<replica>
<host>10.167.85.26host>
<port>9000port>
<password>123456password>
replica>
<replica>
<host>10.167.85.24host>
<port>9988port>
<password>123456password>
replica>
shard>
cluster_3S_2R>
clickhouse_remote_servers>
<zookeeper-servers>
<node>
<host>10.167.85.24host>
<port>2181port>
node>
<node>
<host>10.167.85.25host>
<port>2181port>
node>
<node>
<host>10.167.85.26host>
<port>2181port>
node>
zookeeper-servers>
<networks>
<ip>::/0ip>
networks>
<macros>
<shard>02shard>
<replica>10.167.85.25replica>
macros>
<clickhouse_compression>
<case>
<min_part_size>10000000000min_part_size>
<min_part_size_ratio>0.01min_part_size_ratio>
<method>lz4method>
case>
clickhouse_compression>
yandex>
<include_from>/etc/clickhouse-server/metrika.xmlinclude_from>
<remote_servers incl="clickhouse_remote_servers" >remote_servers>
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<users>
<newusersname>
<password>123456password>
<networks incl="networks" replace="replace">
<ip>::/0ip>
networks>
<profile>defaultprofile>
<quota>defaultquota>
<access_management>1access_management>
newusersname>
<default>
<password>123password>
<networks incl="networks" replace="replace">
<ip>::/0ip>
networks>
<profile>defaultprofile>
<quota>defaultquota>
default>
users>
6.复制实例并启动
只有3台机器只能在每台机器上启动两个实例,6个实例组成3分片2副本集群。 每台机器除了9000再启动一个9988端口实例
cp /etc/systemd/system/clickhouse-server.service /etc/systemd/system/clickhouse-server9988.service
sudo vim /etc/systemd/system/clickhouse-server9988.service
# 修改一行即可
# ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
改为
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config9988.xml --pid-file=/run/clickhouse-server1/clickhouse-server9988.pid
# 注意clickhouse-server9988.service的权限 chmod 764 /etc/systemd/system/clickhouse-server9988.service
复制config.xml为config9988.xml,并修改端口和路径
```shell
#路径修改
sed -i 's#/var/lib/clickhouse#/app/clickhouse#' config.xml
#端口tcp_port、http_port、mysql_port手工修改
#启动 如果失败把clickhouse-server9988.service中的语句复制出来执行看具体报错,基本都是文件权限不足
service clickhouse-server start
service clickhouse-server9988 start
#查看异常日志
journalctl -u clickhouse-server
journalctl -u clickhouse-server9988
#查看状态
service clickhouse-serverstatus
service clickhouse-server9988 status
#添加开机启动
systemctl enable clickhouse-server
systemctl enable clickhouse-server9988
7.分布式表测试
#连上一个节点之后测试执行建分布式表语句
clickhouse-client --password 123
clickhouse-client -u default --host localhost --port 9000 --password 123
-- 查询cluster集群名称 比如cluster_3S_2R
--
select * from system.clusters;
SELECT * FROM system.macros;
drop database if exists test on cluster cluster_3S_2R ;
create database test on cluster cluster_3S_2R ;
-- 建本地表(on cluster 会在集群的各个节点上建表, 但是insert数据只会在当前节点)
drop table if exists test.cmtest on cluster cluster_3S_2R;
-- 复制表
CREATE TABLE test.cmtest on cluster cluster_3S_2R (`id` String COMMENT 'id', `nginxTime` DateTime COMMENT 'nginxTime' ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cmtest','{replica}') partition by toYYYYMMDD(nginxTime) primary key (id) ORDER BY (id);
--
drop table if exists test.cmtest_dist on cluster cluster_3S_2R;
-- 分布式表,类似视图,不保存数据,但是查询时会合并各本地表的数据,插入时会根据策略均衡到各节点;
create TABLE test.cmtest_dist on cluster cluster_3S_2R as test.cmtest ENGINE = Distributed("cluster_3S_2R", "test", "cmtest", rand());
-- 对本地insert只插入到本地
insert into test.cmtest values ('29-1',now());
-- 对分布式表插入会根据规则路由到某个节点
insert into test.cmtest_dist values ('1004000',now()+3600*24);
select * from test.cmtest;
# 分布式表会使用metrika.xml配置的用户名密码去分别访问各分片数据,如果正常应该查到全量数据
select * from test.cmtest_dist;
-- 删除分布式表不会删除数据,重新创建分布式表后仍然可以查询到全量数据
drop table if exists test.cmtest_dist on cluster cluster_3S_2R;
-- 删除本地表会删数据
drop table if exists test.cmtest on cluster cluster_3S_2R;
drop database if exists test on cluster cluster_3S_2R ;
参考官网文档 https://clickhouse.com/docs/zh/engines/table-engines/mergetree-family/replication/