• 一道 python 数据分析的题目


    python 数据分析的题目。

    做题方法:使用 pandas 读取数据,然后分析。

    知识点:pandas,正则表达式,py知识。

    过程:不断使用 GPT,遇到有问题的地方自己分析,把分析的结果告诉 GPT,注意要使用 Data Analysis,中文效果不是很好。

    在这里插入图片描述

    import pandas as pd
    import hashlib
    import re
    from datetime import datetime
    
    def load_csv(file_path):
        return pd.read_csv(file_path)
    
    def preprocess_a_table(a_table):
        account_table_map = {}
        for _, row in a_table.iterrows():
            account = row['账号']
            table = row['操作表']
            if account not in account_table_map:
                account_table_map[account] = set()
            account_table_map[account].add(table)
        return account_table_map
    
    #  row 是b表
    def is_valid_operation(row, a_table, account_table_map):
        account = row['账号']
        operation = row['执行操作'].lower()
    
        # Extracting table name and operation type
        table_name_match = re.search(r'from (\w+)', operation) or re.search(r'into (\w+)', operation) or re.search(r'update (\w+)', operation)
        table_name = table_name_match.group(1) if table_name_match else None
    
        operation_type_match = re.match(r'(\w+)', operation)
        operation_type = operation_type_match.group(1) if operation_type_match else None
    
        # Check if account is not in account_table_map or if the table_name is not in the account's permissible tables
        if account not in account_table_map or (table_name and table_name not in account_table_map[account]):
            return '0_' + str(row['编号'])
    
        # Check for time and permission violations
        for _, account_row in a_table[a_table['账号'] == account].iterrows():
            if account_row['操作表'] == table_name:
                # Time check
                operation_time = datetime.strptime(row['操作时间'], '%Y/%m/%d %H:%M:%S').time()
                start_time, end_time = account_row['可操作时间段(时:分:秒)'].split('~')
                start_time = datetime.strptime(start_time, '%H:%M:%S').time()
                end_time = datetime.strptime(end_time, '%H:%M:%S').time()
    
                if not (start_time <= operation_time <= end_time):
                    return str(account_row['编号']) + '_' + str(row['编号'])  # Time violation
    
                # Permission check
                permissions = account_row['权限'].split(',')
                if operation_type and operation_type not in permissions:
                    return str(account_row['编号']) + '_' + str(row['编号'])  # Permission violation
    
        return None
    
    def custom_sort(violation):
        parts = violation.split('_')
        return (int(parts[0]), int(parts[1]))
    
    def main():
        a_table_path = '../iris_data/a_table_test.csv'
        b_table_path = '../iris_data/b_table_test.csv'
    
        a_table = load_csv(a_table_path)
        b_table = load_csv(b_table_path)
    
        account_table_map = preprocess_a_table(a_table)
    
        violations = []
        for _, row in b_table.iterrows():
            violation = is_valid_operation(row, a_table, account_table_map)
            if violation:
                violations.append(violation)
    
        # Sort the violations based on the numerical part after the underscore
        violations_sorted = sorted(violations, key=custom_sort)
    
        # Join the sorted list into a string
        violations_joined = ','.join(violations_sorted)
    
        # Calculate MD5 hash
        md5_hash = hashlib.md5(violations_joined.encode()).hexdigest()
    
        print(f"Sorted Violations: {violations_joined}")
        print(f"MD5 Hash: {md5_hash}")
    
    
    if __name__ == "__main__":
        main()
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
  • 相关阅读:
    分布式事务详解
    基于three.js实现的点击盒子消除游戏
    学生会学习部部长竞选稿
    基于WPSOffice+Pywpsrpc构建Docker镜像,实现文档转换和在线预览服务
    测试人生 | 从传统行业到名企大厂,薪资翻倍,我做到了
    软件成分分析:华为云重磅发布开源软件治理服务
    VUE 项目 nginx部署
    SeaTunnel 入门到精通(一)
    【HTML】猜拳小游戏
    手把手APP抓包检测实战 - 某汽车APP
  • 原文地址:https://blog.csdn.net/hacker__man/article/details/134418995