• datax同步clickhouse数据到hive


    1.准备数据

    1.1 clickhouse建表并插入数据

    1. CREATE TABLE cell_towers_10
    2. (
    3.     radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
    4.     mcc UInt16,
    5.     net UInt16,
    6.     area UInt16,
    7.     cell UInt64,
    8.     unit Int16,
    9.     lon Float64,
    10.     lat Float64,
    11.     range UInt32,
    12.     samples UInt32,
    13.     changeable UInt8,
    14.     created DateTime,
    15.     updated DateTime,
    16.     averageSignal UInt8
    17. )
    18. ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
    1. INSERT INTO datasets.cell_towers_10 (radio,mcc,net,area,cell,unit,lon,lat,`range`,samples,changeable,created,updated,averageSignal) VALUES
    2.  ('CDMA',302,86,130,4113,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:44','2017-09-15 10:10:44',0),
    3.  ('CDMA',302,86,130,0,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:45','2017-09-15 10:10:45',0),
    4.  ('CDMA',302,86,130,4114,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:46','2017-09-15 10:10:46',0),
    5.  ('CDMA',302,1168,15002,59995,-1,-79.462952,44.009564,1000,7,1,'2017-09-14 19:22:48','2017-09-14 19:30:04',0),
    6.  ('CDMA',302,1168,15002,59506,-1,-79.522812,43.79319,1000,1,1,'2017-09-14 19:57:04','2017-09-14 20:33:33',0),
    7.  ('CDMA',302,1168,15004,60815,-1,-79.315284,43.838686,1000,7,1,'2017-09-14 20:22:45','2017-09-14 21:06:28',0),
    8.  ('CDMA',302,1168,15002,59507,-1,-79.459198,43.797741,1000,3,1,'2017-09-14 20:38:37','2017-09-14 21:20:47',0),
    9.  ('CDMA',302,1168,15002,59946,-1,-79.462547,44.01469,1000,1,1,'2017-09-14 22:19:45','2017-09-14 22:56:46',0),
    10.  ('CDMA',302,1168,16000,14113,-1,-80.480919,43.435841,1000,1,1,'2017-09-14 22:53:59','2017-09-15 00:22:24',0),
    11.  ('CDMA',302,1168,15004,60516,-1,-79.37619,43.84483,1000,2,1,'2017-09-14 23:11:07','2017-09-15 00:57:57',0);

    1.2 hive中建表

    1. CREATE TABLE ck_cell_towers_10
    2. (
    3. radio string,
    4. mcc smallint,
    5. net smallint,
    6. area int,
    7. cell bigint,
    8. unit smallint,
    9. lon double,
    10. lat double,
    11. range_a int,
    12. samples int,
    13. changeable tinyint,
    14. created date,
    15. updated date,
    16. averageSignal tinyint
    17. )row format delimited fields terminated by ",";

    2. 准备工作

    由于Datax没有clickhousereader组件,用rdbmsreader替代。

    需要把clickhousewriter/libs下的所有jar包复制到rdbmsreader/libs下,同名jar包直接替换,另外,删掉rm -f guava-r05.jar这个包,否则会报错。

    修改plugin.json文件:在"driver" 增加 "ru.yandex.clickhouse.ClickHouseDriver"。

    编辑json文件时,name改为rdbmsreader。

    "name": "rdbmsreader" 

    3. 创建任务

    可以在datax-web中创建任务生成json,也可以直接编辑json

    1. {
    2. "job": {
    3. "setting": {
    4. "speed": {
    5. "channel": 3
    6. },
    7. "errorLimit": {
    8. "record": 0,
    9. "percentage": 0.02
    10. }
    11. },
    12. "content": [
    13. {
    14. "reader": {
    15. "name": "rdbmsreader",
    16. "parameter": {
    17. "username": "yRjwDFuoPKlqya9h9H2Amg==",
    18. "password": "yRjwDFuoPKlqya9h9H2Amg==",
    19. "column": [
    20. "radio",
    21. "mcc",
    22. "net",
    23. "area",
    24. "cell",
    25. "unit",
    26. "lon",
    27. "lat",
    28. "range",
    29. "samples",
    30. "changeable",
    31. "created",
    32. "updated",
    33. "averageSignal"
    34. ],
    35. "splitPk": "",
    36. "connection": [
    37. {
    38. "table": [
    39. "cell_towers"
    40. ],
    41. "jdbcUrl": [
    42. "jdbc:clickhouse://10.16.60.44:8123/datasets"
    43. ]
    44. }
    45. ]
    46. }
    47. },
    48. "writer": {
    49. "name": "hdfswriter",
    50. "parameter": {
    51. "defaultFS": "hdfs://10.16.60.31:8020",
    52. "fileType": "text",
    53. "path": "/user/hive/warehouse/datasets.db/ck_cell_towers",
    54. "fileName": "ck_cell_towers",
    55. "writeMode": "append",
    56. "fieldDelimiter": ",",
    57. "column": [
    58. {
    59. "name": "radio",
    60. "type": "string"
    61. },
    62. {
    63. "name": "mcc",
    64. "type": "smallint"
    65. },
    66. {
    67. "name": "net",
    68. "type": "smallint"
    69. },
    70. {
    71. "name": "area",
    72. "type": "int"
    73. },
    74. {
    75. "name": "cell",
    76. "type": "bigint"
    77. },
    78. {
    79. "name": "unit",
    80. "type": "smallint"
    81. },
    82. {
    83. "name": "lon",
    84. "type": "double"
    85. },
    86. {
    87. "name": "lat",
    88. "type": "double"
    89. },
    90. {
    91. "name": "range_a",
    92. "type": "int"
    93. },
    94. {
    95. "name": "samples",
    96. "type": "int"
    97. },
    98. {
    99. "name": "changeable",
    100. "type": "tinyint"
    101. },
    102. {
    103. "name": "created",
    104. "type": "date"
    105. },
    106. {
    107. "name": "updated",
    108. "type": "date"
    109. },
    110. {
    111. "name": "averagesignal",
    112. "type": "tinyint"
    113. }
    114. ]
    115. }
    116. }
    117. }
    118. ]
    119. }
    120. }

    4. 执行结果

    fe6660d5abad4a41b059b54600e1cf84.png

  • 相关阅读:
    spring security(二)--授权
    5G 安全评估流程指南
    C复习-函数指针+字符串常量
    JAVA继承
    前端面试话术集锦第 15 篇:高频考点(React常考进阶知识点)
    130道基础OJ编程题之: 29 ~ 38 道
    Docker踩坑,又涨知识了
    [Doris]contens集群部署和基本使用(有时间继续补充内容)
    java中的泛型
    1019 General Palindromic Number
  • 原文地址:https://blog.csdn.net/shangjg03/article/details/132908304