• [星期维度]日志数据提取事件关键词,解析对应日期的星期计数,matplotlib绘制统计图,python


    [星期维度]日志数据提取事件关键词,解析对应日期的星期计数,matplotlib绘制统计图,python

    这次把日志数据中每一行包含关键词的日期对应的星期计数,绘制统计图表

    参考文:

    根据星期时间统计日期总量,绘制图表,pandas,matplotlib,Python

    https://zhangphil.blog.csdn.net/article/details/125934069https://zhangphil.blog.csdn.net/article/details/125934069

    日志数据提取事件关键词,解析对应时间点计数,matplotlib绘制统计图,python
    https://zhangphil.blog.csdn.net/article/details/125923359https://zhangphil.blog.csdn.net/article/details/125923359

    1. from datetime import datetime
    2. from pprint import pp
    3. import pandas as pd
    4. import matplotlib
    5. import matplotlib.pyplot as plt
    6. from fuzzywuzzy import fuzz
    7. import re
    8. FILE_PATH = r'源数据路径'
    9. KEY = r'模糊匹配的关键词' # 关键词1,关键词2
    10. threshold = 80
    11. SECTION = 'section'
    12. SUM = 'sum'
    13. def drawchart(df):
    14. myfont = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc')
    15. plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
    16. plt.rc('font', family='YaHei', weight='bold')
    17. order = []
    18. name = []
    19. mem = []
    20. for d, i in zip(df.values, df.index):
    21. order.append(i)
    22. name.append(d[0])
    23. mem.append(int(d[1]))
    24. FONT_SIZE = 12
    25. fig, ax = plt.subplots(figsize=(15, 13))
    26. b = ax.barh(y=range(len(name)), width=mem, align='center', color='red')
    27. # 为横向水平的柱图右侧添加数据标签。
    28. i = 0
    29. for rect in b:
    30. w = rect.get_width()
    31. ax.text(x=w, y=rect.get_y() + rect.get_height() / 2, s='%d' % (int(w)),
    32. horizontalalignment='left', verticalalignment='center',
    33. fontproperties=myfont, fontsize=FONT_SIZE - 2, color='green')
    34. ax.text(x=w / 2, y=rect.get_y() + rect.get_height() / 2, s=str(order[i]),
    35. horizontalalignment='center', verticalalignment='center',
    36. fontproperties=myfont, fontsize=FONT_SIZE - 3, color='white')
    37. i = i + 1
    38. ax.set_yticks(range(len(name)))
    39. ax.set_yticklabels(name, fontsize=FONT_SIZE - 1, fontproperties=myfont)
    40. ax.invert_yaxis()
    41. ax.set_xlabel('数据', fontsize=FONT_SIZE + 2, fontproperties=myfont)
    42. ax.set_title('不同星期日数据点总量排名', fontsize=FONT_SIZE + 3, fontproperties=myfont)
    43. # 不要横坐标上的label标签。
    44. plt.xticks(())
    45. # 清除四周的边框线
    46. ax.get_yaxis().set_visible(True)
    47. for spine in ["left", "top", "right", "bottom"]:
    48. ax.spines[spine].set_visible(False)
    49. plt.subplots_adjust(left=0.15) # 调整左侧边距
    50. # ax.margins(y=0.01) #缩放 zoom in
    51. ax.set_aspect('auto')
    52. plt.show()
    53. def read_file():
    54. file = open(FILE_PATH, 'r', encoding='UTF-8')
    55. all_case_time = []
    56. case_count = 1
    57. cnt = 1
    58. for line in file:
    59. pr = fuzz.partial_ratio(line, KEY)
    60. if pr >= threshold:
    61. print('-----')
    62. print(f'第{case_count}件')
    63. case_count = case_count + 1
    64. try:
    65. # 正则匹配 xxxx年xx月xx日xx时xx分
    66. mat = re.search(r'\d{4}\年\d{1,2}\月\d{1,2}\日\d{1,2}\时\d{1,2}\分', line)
    67. t_str = mat.group().replace('\n', '') # 去掉正则匹配到但是多余的 \n 换行符
    68. try:
    69. object_t = datetime.strptime(t_str, "%Y年%m月%d日%H时%M分")
    70. all_case_time.append(object_t.date()) # 日期提取出来,放到数组中
    71. print(f'{object_t.date().strftime("%Y-%m-%d")} {object_t.weekday()}')
    72. except:
    73. print('解析日期失败')
    74. pass
    75. except:
    76. t_str = '-解析异常-'
    77. pass
    78. s = '第{number}行,相似度{ratio},时间{case_time}\n{content}'
    79. ss = s.format(number=cnt, ratio=pr, case_time=t_str, content=line)
    80. pp(ss)
    81. # 快速调试
    82. # if case_count > 100:
    83. # break
    84. cnt = cnt + 1
    85. file.close()
    86. return all_case_time
    87. def data_frame():
    88. ts = read_file()
    89. times = []
    90. for i in range(7):
    91. times.append({SECTION: i, SUM: 0})
    92. for t in ts:
    93. for tx in times:
    94. if tx[SECTION] == t.weekday():
    95. tx[SUM] = tx[SUM] + 1
    96. break
    97. return times
    98. def number_to_weekday(number):
    99. zh = ['一', '二', '三', '四', '五', '六', '日']
    100. weekday = f'星期{zh[number]}'
    101. return weekday
    102. if __name__ == '__main__':
    103. times = data_frame()
    104. # 数据组装成pandas数据帧。
    105. pd_data = []
    106. for t in times:
    107. l = [number_to_weekday(t[SECTION]), t[SUM]]
    108. pd_data.append(l)
    109. col = ['星期', '次数']
    110. df = pd.DataFrame(data=pd_data, columns=col)
    111. df = df.sort_values(by=col[1], axis=0, ascending=False) # 降序
    112. # 重置索引
    113. df = df.reset_index(drop=True)
    114. df.index = df.index + 1
    115. # 前10名
    116. pp(df.head(20))
    117. # pp(df.values)
    118. drawchart(df)

    变换不同关键词,得出的统计图:

  • 相关阅读:
    还在找PDF合并文件的方法?这就有3个实用方法
    GAN.py
    电脑游戏录屏哪个好用免费?这2款录屏软件,用过都说好!
    AQS理解
    Java8:Effectively final
    C语言常考面试基础问题
    LeetCode HOT 100 —— 75 .颜色分类
    Spring Cloud根据服务名获取服务的ip端口
    off-by-one (b00ks)
    java利用EasyExcel实现导入功能,并返回错误信息的所属行列
  • 原文地址:https://blog.csdn.net/zhangphil/article/details/125941649