• java 实现布隆过滤器(BloomFilter)


    HashGenerator.java

    1. package BloomFilter;
    2. public class HashGenerator {
    3. private int size;
    4. private int seed;
    5. private HashGroup group;
    6. enum HashGroup {
    7. G1, G2, G3, G4
    8. }
    9. public HashGenerator(int size, int seed, HashGroup group) {
    10. this.size = size;
    11. this.seed = seed;
    12. this.group = group;
    13. }
    14. public int doHash(String value) {
    15. switch (group) {
    16. case G1 -> {return hashG1(value);}
    17. case G2 -> {return hashG2(value);}
    18. case G3 -> {return hashG3(value);}
    19. case G4 -> {return hashG4(value);}
    20. default -> throw new RuntimeException("Err HashGroup is NULL!");
    21. }
    22. }
    23. private int hashG1(String value) {
    24. var hash = 0;
    25. for (var idx = 0; idx < value.length(); idx++) {
    26. var c = value.charAt(idx);
    27. hash = (hash << 5) + hash + c;
    28. hash &= hash;
    29. hash = Math.abs(hash);
    30. }
    31. return hash % (seed + size - 1);
    32. }
    33. private int hashG2(String value) {
    34. var hash = 7397;
    35. for (var idx = 0; idx < value.length(); idx++) {
    36. var c = value.charAt(idx);
    37. hash = (hash << 5) + hash + c;
    38. }
    39. return Math.abs(hash % seed * (size - 1));
    40. }
    41. private int hashG3(String value) {
    42. var hash = 0;
    43. for (var idx = 0; idx < value.length(); idx++) {
    44. var c = value.charAt(idx);
    45. hash = (hash << 5) + hash + c;
    46. hash += c;
    47. hash &= hash;
    48. }
    49. return Math.abs(hash % (seed * size - 1));
    50. }
    51. private int hashG4(String value) {
    52. int h;
    53. return (value == null) ? 0 : Math.abs(seed * (size - 1) & ((h = value.hashCode()) ^ (h >>> 16)));
    54. }
    55. }

    BloomFilter.java

    1. package BloomFilter;
    2. import java.util.BitSet;
    3. public class BloomFilter {
    4. private static final HashGenerator.HashGroup[] GROUPS = new HashGenerator.HashGroup[]{HashGenerator.HashGroup.G1, HashGenerator.HashGroup.G2, HashGenerator.HashGroup.G3, HashGenerator.HashGroup.G4};
    5. private final BitSet bits;
    6. private HashGenerator[] generators;
    7. public BloomFilter(int size, int[] seeds) {
    8. bits = new BitSet(size);
    9. generators = new HashGenerator[seeds.length];
    10. for (var i = 0; i < seeds.length; i++) {
    11. generators[i] = new HashGenerator(size, seeds[i], GROUPS[i % GROUPS.length]);
    12. }
    13. }
    14. public void add(String value) {
    15. for (var generator : generators) {
    16. var hash = generator.doHash(value);
    17. bits.set(hash, true);
    18. }
    19. }
    20. public boolean contains(String value) {
    21. var ret = true;
    22. for (var generator : generators) {
    23. ret = ret && bits.get(generator.doHash(value));
    24. }
    25. return ret;
    26. }
    27. }

    测试代码

    1. public static void main(String[] args) {
    2. String val00 = "小傅哥";
    3. String val01 = "https://bugstack.cn";
    4. String val02 = "https://github.com/fuzhengwei/CodeGuide";
    5. String val03 = "https://github.com/fuzhengwei";
    6. BloomFilter filter = new BloomFilter(Integer.MAX_VALUE, new int[]{7, 19, 43, 77});
    7. filter.add(val00);
    8. filter.add(val01);
    9. filter.add(val02);
    10. System.out.println("测试结果 val00:" + val00 + " 布隆过滤器:" + filter.contains(val00));
    11. System.out.println("测试结果 val01:" + val01 + " 布隆过滤器:" + filter.contains(val01));
    12. System.out.println("测试结果 val02:" + val02 + " 布隆过滤器:" + filter.contains(val02));
    13. System.out.println("测试结果 val02:" + val03 + " 布隆过滤器:" + filter.contains(val03));
    14. }

  • 相关阅读:
    如何在导入的数据库查找api接口
    FFmpeg中的常用结构体分析
    全网监控 nginx 部署 zabbix6.0
    Ts常见报错解决方案
    [答疑]改善系统的性能,用得着业务建模吗
    MySQL 8.0 新特性之 Clone Plugin
    Tesseract .Net SDK C# OCR 2022.1
    Dubbo源码(六) - 服务路由
    【panel】屏EMC展频调试方法
    win11蓝屏DRIVER_VERIFIER_DMA_VIOLATION?
  • 原文地址:https://blog.csdn.net/fanghailiang2016/article/details/138104951