• apriori算法python实现


    import numpy as np

    def load_data(file_path):

        data = []

        with open(file_path, 'r') as f:

            for line in f.readlines():

                line = line.strip().split(',')

                data.append(line)

        return data

    def create_C1(data):

        C1 = set()

        for transaction in data:

            for item in transaction:

                C1.add(frozenset([item]))

        return C1

    def is_apriori(Ck_item, Lksub1):

        for item in Ck_item:

            sub_Ck = Ck_item - frozenset([item])

            if sub_Ck not in Lksub1:

                return False

        return True

    def create_Ck(Lksub1, k):

        Ck = set()

        len_Lksub1 = len(Lksub1)

        list_Lksub1 = list(Lksub1)

        for i in range(len_Lksub1):

            for j in range(1, len_Lksub1):

                l1 = list(list_Lksub1[i])

                l2 = list(list_Lksub1[j])

                l1.sort()

                l2.sort()

                if l1[0:k-2] == l2[0:k-2]:

                    Ck_item = list_Lksub1[i] | list_Lksub1[j]

                    if is_apriori(Ck_item, Lksub1):

                        Ck.add(Ck_item)

        return Ck

    def generate_Lk_by_Ck(data, Ck, min_support):

        Lk = set()

        len_data = len(data)

        item_count = {}

        for transaction in data:

            for item in Ck:

                if item.issubset(transaction):

                    if item not in item_count:

                        item_count[item] = 1

                    else:

                        item_count[item] += 1

        support_data = {key: value / len_data for key, value in item_count.items() if value / len_data >= min_support}

        for key in support_data:

            Lk.add(key)

        return Lk

    def apriori(data, min_support=0.5):

        C1 = create_C1(data)

        D = list(map(set, data))

        L1, support_data = generate_Lk_by_Ck(D, C1, min_support)

        Lksub1 = L1.copy()

        L = [Lksub1]

        i = 2

        while True:

            Ci = create_Ck(Lksub1, i)

            Li, supK = generate_Lk_by_Ck(D, Ci, min_support)

            if not Li:

                break

            Lksub1 = Li.copy()

            L.append(Lksub1)

            i += 1

        return L, support_data

    if __name__ == '__main__':

        file_path = 'your_file_path.csv'  # 请替换为你的数据文件路径

        data = load_data(file_path)

        L, support_data = apriori(data)

        print("频繁项集:", L)

        print("支持度数据:", support_data)

  • 相关阅读:
    [spring]spring的使用笔记大全
    微服务不是问题,无能才是!
    android: Preferences DataStore 和 Proto DataStore use guide
    分布式原理
    随手记录:自家小米路由器配置了哪些东东以备后用
    【Java】Math 类
    IDEA实现远程Debug调试
    ChatGPT WPS AI 一键制作各类汇报型PPT演示文档
    Spring Cloud Alibaba系列之nacos:(3)服务集成Nacos
    Python 中 staticmethod 和 classmethod 原理
  • 原文地址:https://blog.csdn.net/2301_78263023/article/details/134084868