• DBSCAN算法c++实现


     首先计算出距离每个点e以内的点,如果个数>=minPts,则找出它的直接密度可达和间接可达的点,用visited标记点是否已经在簇中,循环直到最后一个点。

    1. #include
    2. #include
    3. #include
    4. #include
    5. #include
    6. #include
    7. using namespace std;
    8. struct Point
    9. {
    10. double x;
    11. double y;
    12. };
    13. double distance(const Point& a, const Point& b)
    14. {
    15. return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
    16. }
    17. vector int>> getPointSet(const vector data, double e)
    18. {
    19. vectorint>> PointSet(data.size()); //用到了PointSet[i],要先进行初始化
    20. //vector> PointSet;
    21. for(int i=0;isize();i++)
    22. {
    23. for (int j = 0; j < data.size(); j++)
    24. {
    25. if (distance(data[i], data[j]) <= e)
    26. {
    27. PointSet[i].insert(j); //j还是j+1
    28. }
    29. }
    30. }
    31. return PointSet;
    32. }
    33. vectorint>> DAIANA(vectorint>> point,int MinPts)
    34. {
    35. vector<bool> visted(point.size(),false);
    36. vectorint>> cluster;
    37. for (int i = 0; i < point.size(); i++)
    38. {
    39. if (!visted[i])
    40. {
    41. if (point[i].size() >= MinPts) //{几个}
    42. {
    43. for (set<int>::const_iterator j = point[i].begin(); j != point[i].end(); j++)
    44. {
    45. visted[*j] = true;
    46. if (point[*j].size() >= MinPts) //间接相连
    47. {
    48. for (auto k = point[*j].begin(); k != point[*j].end(); k++)
    49. {
    50. visted[*k] = true;
    51. point[i].insert(*k);
    52. }
    53. //point[i].insert(point[*j].begin(), point[*j].end());
    54. }
    55. }
    56. cluster.push_back(point[i]);
    57. }
    58. }
    59. }
    60. return cluster;
    61. }
    62. vector ReadData(string filename)
    63. {
    64. vector data;
    65. ifstream file(filename);
    66. if (file.is_open())
    67. {
    68. string line;
    69. while (getline(file, line))
    70. {
    71. istringstream iss(line);
    72. double x, y;
    73. string token;
    74. Point point;
    75. if (getline(iss, token, ',') && istringstream(token) >> point.x &&
    76. getline(iss, token, ',') && istringstream(token) >> point.y) {
    77. data.push_back(point);
    78. }
    79. }
    80. }
    81. else
    82. {
    83. cout << "open fail";
    84. }
    85. file.close();
    86. return data;
    87. }
    88. int main()
    89. {
    90. vector dataset = ReadData("data.txt");
    91. double e;
    92. int MinPts;
    93. cin >> e >> MinPts;
    94. vector int>> point;
    95. vectorint>> cluster;
    96. point=getPointSet(dataset, e);
    97. cluster=DAIANA(point, MinPts);
    98. for (int i = 0; i < cluster.size(); i++)
    99. {
    100. cout << "{";
    101. int count = 0;
    102. for (auto j = cluster[i].begin(); j != cluster[i].end(); j++)
    103. {
    104. cout << *j+1 ;
    105. if (++count < cluster[i].size())
    106. {
    107. cout << ",";
    108. }
    109. }
    110. cout << "}" << endl;
    111. }
    112. }

    或 

    判断是否是核心点,如果是,将它的直接密度可达点放进set neighbors;再判断这些直接密度可达点里如果有核心点则又将该直接密度可达点放进set subNeighbors,最后neighbors.insert(subNeighbors.begin(), subNeighbors.end());

    1. std::vector> dbscan(const std::vector& dataset, double epsilon, int minPts) {
    2. std::vector> clusters;
    3. std::vector<bool> visited(dataset.size(), false);
    4. for (int i = 0; i < dataset.size(); ++i) {
    5. if (visited[i]) continue;
    6. visited[i] = true;
    7. std::vector cluster;
    8. std::set<int> neighbors;
    9. for (int j = 0; j < dataset.size(); ++j) {
    10. if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {
    11. neighbors.insert(j);
    12. }
    13. }
    14. if (neighbors.size() >= minPts) {
    15. for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
    16. int index = *it;
    17. if (!visited[index]) {
    18. visited[index] = true;
    19. std::set<int> subNeighbors;
    20. for (int j = 0; j < dataset.size(); ++j) {
    21. if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {
    22. subNeighbors.insert(j);
    23. }
    24. }
    25. if (subNeighbors.size() >= minPts) {
    26. neighbors.insert(subNeighbors.begin(), subNeighbors.end());
    27. }
    28. }
    29. }
    30. }
    31. for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
    32. cluster.push_back(dataset[*it]);
    33. }
    34. if (!cluster.empty()) {
    35. clusters.push_back(cluster);
    36. }
    37. }
    38. return clusters;
    39. }

    数据集:参考数据挖掘原理与算法第四版DBSCAN例子

    1.0, 0.0
    4.0, 0.0
    0.0, 1.0
    1.0, 1.0
    2.0, 1.0
    3.0,1.0    
    4.0,1.0    
    5.0,1.0    
    0.0,2.0    
    1.0,2.0    
    4.0,2.0    
    1.0,3.0

    结果

  • 相关阅读:
    手把手Java爬虫教学 - 8. 项目2 - 数据库表设计 & 爬虫代码实现
    Linux设备树插件
    nifi从入门到实战(保姆级教程)——flow
    Leetcode 1662. 检查两个字符串数组是否相等
    基于C#的超市进销存管理系统设计与实现
    第八节:类和对象【二】【this引用和包】
    Zellij-一个典型的 Rust程序的性能优化案例
    java毕业设计乡镇卫生院信息管理mybatis+源码+调试部署+系统+数据库+lw
    有什么好用的IT资产管理软件
    CAN总线数据采集工具PCAN的使用教程
  • 原文地址:https://blog.csdn.net/m0_70732442/article/details/134082366