• 数据库:Hive转Presto(二)


    继续上节代码,补充了replace_func函数,

    1. import re
    2. import os
    3. from tkinter import *
    4. class Hive2Presto:
    5. def __int__(self):
    6. self.t_funcs = ['substr', 'nvl', 'substring', 'unix_timestamp'] + \
    7. ['to_date', 'concat', 'sum', 'avg', 'abs', 'year', 'month', 'ceiling', 'floor']
    8. self.time_funcs = ['date_add', 'datediff', 'add_months']
    9. self.funcs = self.t_funcs + self.time_funcs
    10. self.current_path = os.path.abspath(__file__)
    11. self.dir = os.path.dirname(self.current_path)
    12. self.result = []
    13. self.error = []
    14. self.filename = ''
    15. def main(self):
    16. self.root = Tk()
    17. self.root.config(bg='#ff741d') # 背景颜色设置为公司主题色^_^
    18. self.root.title('Hive转Presto')
    19. self.win_width = 550
    20. self.win_height = 500
    21. self.screen_width = self.root.winfo_screenwidth()
    22. self.screen_height = self.root.winfo_screenheight()
    23. self.x = (self.screen_width - self.win_width) // 2
    24. self.y = (self.screen_height - self.win_height) // 2
    25. self.root.geometry(f'{self.win_width}x{self.win_height}+{self.x}+{self.y}')
    26. font = ('楷体', 11)
    27. self.button = Button(self.root, text='转换', command=self.trans, bg='#ffcc8c', font=font, anchor='e')
    28. self.button.grid(row=0, column=0, padx=100, pady=10, sticky=W)
    29. self.file_button = Button(self.root, text='选择文件', command=self.choose_file, bg='#ffcc8c', font=font,
    30. anchor='e')
    31. self.file_button.grid(row=0, column=1, padx=0, pady=10, sticky=W)
    32. self.entry = Entry(self.root, width=65, font=font)
    33. self.entry.insert(0, '输入Hive代码')
    34. self.entry.grid(row=1, column=0, padx=10, pady=10, columnspan=2)
    35. self.entry.bind('', self.delete_text)
    36. self.text = Text(self.root, width=75, height=20)
    37. self.text.grid(row=2, column=0, padx=10, pady=10, columnspan=2)
    38. self.des_label = Label(self.root, text='可以复制结果,也有生成的文件,与选取的文件同文件夹', bg='#ffcc8c',
    39. font=('楷体', 10))
    40. self.des_label.grid(row=3, column=0, padx=10, pady=10, columnspan=2)
    41. s = ''
    42. for i in range(0, (n := len(self.funcs)), 4):
    43. if i + 4 <= n:
    44. s += ','.join(self.funcs[i:i + 4]) + '\n'
    45. else:
    46. s += ','.join(self.funcs[i:]) + '\n'
    47. s = s[:-1]
    48. self.des_label1 = Label(self.root, text=s, bg='#ffcc8c',
    49. font=('楷体', 10))
    50. self.des_label1.grid(row=4, column=0, padx=10, pady=10, columnspan=2)
    51. self.root.columnconfigure(0, minsize=10)
    52. self.root.columnconfigure(1, minsize=10)
    53. self.root.columnconfigure(0, pad=5)
    54. self.root.mainloop()
    55. def replace_func(self, s, res):
    56. """
    57. 把搜索到函数整体取出来,处理括号中的参数
    58. :param s:
    59. :param res:
    60. :return:
    61. """
    62. for f in res:
    63. f1 = f.replace('\n', '').strip()
    64. f1 = re.sub(r'(\s*)', '(', f1)
    65. # 搜索括号里的字符串
    66. if re.findall(r'(\w*)\(', f1):
    67. func_name = re.findall(r'(\w*)\(', f1)[0].strip()
    68. else:
    69. continue
    70. try:
    71. if 'date_add' == func_name.lower():
    72. date, date_num = self.extact_func(f1, func_name)
    73. s_n = f"date_add('day',{date_num},cast(substr(cast{date} as varchar,1,10) as date))"
    74. s = s.replace(f, s_n)
    75. elif 'datediff' == func_name.lower():
    76. date1, date2 = self.extact_func(f1, func_name)
    77. s_n = f"date_add('day',{date2},cast(substr(cast{date} as varchar,1,10) as date),cast(substr(cast{date1} as varchar),1,10) as date))"
    78. s = s.replace(f, s_n)
    79. elif 'nvl' == func_name.lower():
    80. s1, s2 = self.extact_func(f1, func_name)
    81. s_n = f"coalesce({s1},{s2})"
    82. s = s.replace(f, s_n)
    83. elif 'substr' == func_name.lower():
    84. date, start, end = self.extact_func(f1, func_name)
    85. s_n = f"substr(cast({date} as varchar),{start},{end}"
    86. s = s.replace(f, s_n)
    87. elif 'substring' == func_name.lower():
    88. date, start, end = self.extact_func(f1, func_name)
    89. s_n = f"substring(cast({date} as varchar),{start},{end}"
    90. s = s.replace(f, s_n)
    91. elif 'unit_timestamp' == func_name.lower():
    92. date = self.extact_func(f1, func_name)[0]
    93. s_n = f"to_unixtime(cast({date} as timestanp))"
    94. s = s.replace(f, s_n)
    95. elif 'to_date' == func_name.lower():
    96. date = self.extact_func(f1, func_name)[0]
    97. s_n = f"cast({date} as date)"
    98. s = s.replace(f, s_n)
    99. elif 'concat' == func_name.lower():
    100. res = self.extact_func(f1, func_name)[0]
    101. s_n = f'concat('
    102. for r in res:
    103. r = r.strip().replace('\n', '')
    104. s_n += f"cast({r} as varchar),"
    105. s_n = s_n[:-1] + ')'
    106. s = s.replace(f, s_n)
    107. elif 'sum' == func_name.lower():
    108. if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
    109. continue
    110. ss = self.extact_func(f1, func_name)[0]
    111. if 'if(' in ss.replace(' ', ''):
    112. continue
    113. s = self.func_trans(f, f1, func_name, ss, s)
    114. elif 'avg' == func_name.lower():
    115. if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
    116. continue
    117. ss = self.extact_func(f1, func_name)[0]
    118. if 'if(' in ss.replace(' ', ''):
    119. continue
    120. s = self.func_trans(f, f1, func_name, ss, s)
    121. elif 'abs' == func_name.lower():
    122. if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
    123. continue
    124. ss = self.extact_func(f1, func_name)[0]
    125. if 'if(' in ss.replace(' ', ''):
    126. continue
    127. s = self.func_trans(f, f1, func_name, ss, s)
    128. elif 'ceiling' == func_name.lower():
    129. if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
    130. continue
    131. ss = self.extact_func(f1, func_name)[0]
    132. if 'if(' in ss.replace(' ', ''):
    133. continue
    134. s = self.func_trans(f, f1, func_name, ss, s)
    135. elif 'floor' == func_name.lower():
    136. if 'unix_timestamp' in f1 or 'to_unixtime' in f1:
    137. continue
    138. ss = self.extact_func(f1, func_name)[0]
    139. if 'if(' in ss.replace(' ', ''):
    140. continue
    141. s = self.func_trans(f, f1, func_name, ss, s)
    142. elif 'year' == func_name.lower():
    143. date = self.extact_func(f1, func_name)[0]
    144. s_n = f"year(cast(substr(cast({date} as varchar,1,10) as date))"
    145. s = s.replace(f, s_n)
    146. elif 'month' == func_name.lower():
    147. date = self.extact_func(f1, func_name)[0]
    148. s_n = f"month(cast(substr(cast({date} as varchar,1,10) as date))"
    149. s = s.replace(f, s_n)
    150. except:
    151. self.error.append(f"源代码中{func_name}函数参数输入可能有错误,具体为:{f1}")
    152. continue
    153. if self.error:
    154. self.entry.delete(0,END)
    155. self.text.delete("1.0",END)
    156. self.text.insert("end",f"{s}")
    157. self.error.insert(0,'转换失败,有部分没有转成功\n')
    158. root_ex = Tk()
    159. root_ex.title('错误')
    160. win_width = 600
    161. win_height = 200
    162. screen_width = root_ex.winfo_screenwidth()
    163. screen_height = root_ex.winfo_screenheight()
    164. x = (screen_width - win_width) // 2
    165. y = (screen_height - win_height) // 2
    166. root_ex.geometry(f'{win_width}x{win_height}+{x}+{y}')
    167. label_ex = Label(root_ex,text="\n".join(self.error),font=("楷体",10))
    168. label_ex.pack()
    169. root_ex.mainloop()
    170. return s
    171. def func_trans(self, f, f1, func_name, ss, s):
    172. pass
    173. def choose_file(self):
    174. """
    175. 如果代码太多,从text中输入会很卡,直接选择代码文件输入会很快
    176. :return:
    177. """
    178. pass
    179. def findvar(self, ss):
    180. """
    181. 搜索与计算有关的字段
    182. :param ss:
    183. :return:
    184. """
    185. pass
    186. def mysplit(self, s):
    187. """
    188. 分割字段
    189. :param s:
    190. :return:
    191. """
    192. pass
    193. def extact_func(self, s, func_name):
    194. pass
    195. def delete_text(self, event):
    196. pass
    197. def trans(self):
    198. pass
    199. if __name__ == '__main__':
    200. pro = Hive2Presto()
    201. pro.__int__()
    202. pro.main()

     

     

     

     

  • 相关阅读:
    单例设计模式
    语音识别数据的采集方法:基本流程&数据类型
    刷新页面,数据丢失
    Java EE——JVM基础知识
    MySQL——子查询和嵌套查询
    LocalDate、LocalTime、LocalDateTime常用方法
    高校云原生安全该如何构建?
    济南 章丘 科目三 资料 收集
    【Pytorch学习笔记】9.分类器的分类结果如何评估——使用混淆矩阵、F1-score、ROC曲线、PR曲线等(以Softmax二分类为例)
    【mid】sdp解析:奇怪的vector 引用
  • 原文地址:https://blog.csdn.net/Sanfenpai6/article/details/133387662