• [星期维度]日志数据提取事件关键词,解析对应日期的星期计数,matplotlib绘制统计图,python


    [星期维度]日志数据提取事件关键词,解析对应日期的星期计数,matplotlib绘制统计图,python

    这次把日志数据中每一行包含关键词的日期对应的星期计数,绘制统计图表

    参考文:

    根据星期时间统计日期总量,绘制图表,pandas,matplotlib,Python

    https://zhangphil.blog.csdn.net/article/details/125934069https://zhangphil.blog.csdn.net/article/details/125934069

    日志数据提取事件关键词,解析对应时间点计数,matplotlib绘制统计图,python
    https://zhangphil.blog.csdn.net/article/details/125923359https://zhangphil.blog.csdn.net/article/details/125923359

    1. from datetime import datetime
    2. from pprint import pp
    3. import pandas as pd
    4. import matplotlib
    5. import matplotlib.pyplot as plt
    6. from fuzzywuzzy import fuzz
    7. import re
    8. FILE_PATH = r'源数据路径'
    9. KEY = r'模糊匹配的关键词' # 关键词1,关键词2
    10. threshold = 80
    11. SECTION = 'section'
    12. SUM = 'sum'
    13. def drawchart(df):
    14. myfont = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc')
    15. plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
    16. plt.rc('font', family='YaHei', weight='bold')
    17. order = []
    18. name = []
    19. mem = []
    20. for d, i in zip(df.values, df.index):
    21. order.append(i)
    22. name.append(d[0])
    23. mem.append(int(d[1]))
    24. FONT_SIZE = 12
    25. fig, ax = plt.subplots(figsize=(15, 13))
    26. b = ax.barh(y=range(len(name)), width=mem, align='center', color='red')
    27. # 为横向水平的柱图右侧添加数据标签。
    28. i = 0
    29. for rect in b:
    30. w = rect.get_width()
    31. ax.text(x=w, y=rect.get_y() + rect.get_height() / 2, s='%d' % (int(w)),
    32. horizontalalignment='left', verticalalignment='center',
    33. fontproperties=myfont, fontsize=FONT_SIZE - 2, color='green')
    34. ax.text(x=w / 2, y=rect.get_y() + rect.get_height() / 2, s=str(order[i]),
    35. horizontalalignment='center', verticalalignment='center',
    36. fontproperties=myfont, fontsize=FONT_SIZE - 3, color='white')
    37. i = i + 1
    38. ax.set_yticks(range(len(name)))
    39. ax.set_yticklabels(name, fontsize=FONT_SIZE - 1, fontproperties=myfont)
    40. ax.invert_yaxis()
    41. ax.set_xlabel('数据', fontsize=FONT_SIZE + 2, fontproperties=myfont)
    42. ax.set_title('不同星期日数据点总量排名', fontsize=FONT_SIZE + 3, fontproperties=myfont)
    43. # 不要横坐标上的label标签。
    44. plt.xticks(())
    45. # 清除四周的边框线
    46. ax.get_yaxis().set_visible(True)
    47. for spine in ["left", "top", "right", "bottom"]:
    48. ax.spines[spine].set_visible(False)
    49. plt.subplots_adjust(left=0.15) # 调整左侧边距
    50. # ax.margins(y=0.01) #缩放 zoom in
    51. ax.set_aspect('auto')
    52. plt.show()
    53. def read_file():
    54. file = open(FILE_PATH, 'r', encoding='UTF-8')
    55. all_case_time = []
    56. case_count = 1
    57. cnt = 1
    58. for line in file:
    59. pr = fuzz.partial_ratio(line, KEY)
    60. if pr >= threshold:
    61. print('-----')
    62. print(f'第{case_count}件')
    63. case_count = case_count + 1
    64. try:
    65. # 正则匹配 xxxx年xx月xx日xx时xx分
    66. mat = re.search(r'\d{4}\年\d{1,2}\月\d{1,2}\日\d{1,2}\时\d{1,2}\分', line)
    67. t_str = mat.group().replace('\n', '') # 去掉正则匹配到但是多余的 \n 换行符
    68. try:
    69. object_t = datetime.strptime(t_str, "%Y年%m月%d日%H时%M分")
    70. all_case_time.append(object_t.date()) # 日期提取出来,放到数组中
    71. print(f'{object_t.date().strftime("%Y-%m-%d")} {object_t.weekday()}')
    72. except:
    73. print('解析日期失败')
    74. pass
    75. except:
    76. t_str = '-解析异常-'
    77. pass
    78. s = '第{number}行,相似度{ratio},时间{case_time}\n{content}'
    79. ss = s.format(number=cnt, ratio=pr, case_time=t_str, content=line)
    80. pp(ss)
    81. # 快速调试
    82. # if case_count > 100:
    83. # break
    84. cnt = cnt + 1
    85. file.close()
    86. return all_case_time
    87. def data_frame():
    88. ts = read_file()
    89. times = []
    90. for i in range(7):
    91. times.append({SECTION: i, SUM: 0})
    92. for t in ts:
    93. for tx in times:
    94. if tx[SECTION] == t.weekday():
    95. tx[SUM] = tx[SUM] + 1
    96. break
    97. return times
    98. def number_to_weekday(number):
    99. zh = ['一', '二', '三', '四', '五', '六', '日']
    100. weekday = f'星期{zh[number]}'
    101. return weekday
    102. if __name__ == '__main__':
    103. times = data_frame()
    104. # 数据组装成pandas数据帧。
    105. pd_data = []
    106. for t in times:
    107. l = [number_to_weekday(t[SECTION]), t[SUM]]
    108. pd_data.append(l)
    109. col = ['星期', '次数']
    110. df = pd.DataFrame(data=pd_data, columns=col)
    111. df = df.sort_values(by=col[1], axis=0, ascending=False) # 降序
    112. # 重置索引
    113. df = df.reset_index(drop=True)
    114. df.index = df.index + 1
    115. # 前10名
    116. pp(df.head(20))
    117. # pp(df.values)
    118. drawchart(df)

    变换不同关键词,得出的统计图:

  • 相关阅读:
    黑猫带你学Makefile第11篇:当头文件a.h改变时,如何将所有依赖头文件a.h的.c文件都重新编译
    TinyPNG和Optimize.js区别?
    Scala语言入门
    二分类问题中的性能指标【python,机器学习,算法】
    4核8G服务器价格选择轻量还是CVM合适?
    SQL查询
    Amazon MSK 基于 S3 的数据导出、导入、备份、还原、迁移方案
    Python自动化测试框架有哪些?
    挑战Typescript项目中的strict编译模式
    对接京东平台的第一篇
  • 原文地址:https://blog.csdn.net/zhangphil/article/details/125941649