首先计算出距离每个点e以内的点,如果个数>=minPts,则找出它的直接密度可达和间接可达的点,用visited标记点是否已经在簇中,循环直到最后一个点。
- #include
- #include
- #include
- #include
- #include
- #include
- using namespace std;
-
- struct Point
- {
- double x;
- double y;
- };
-
- double distance(const Point& a, const Point& b)
- {
- return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
- }
-
- vector
int>> getPointSet(const vector data, double e) - {
- vector
int>> PointSet(data.size()); //用到了PointSet[i],要先进行初始化 - //vector
> PointSet; - for(int i=0;i
size();i++) - {
- for (int j = 0; j < data.size(); j++)
- {
- if (distance(data[i], data[j]) <= e)
- {
- PointSet[i].insert(j); //j还是j+1
- }
- }
- }
- return PointSet;
- }
-
- vector
int>> DAIANA(vectorint>> point,int MinPts) - {
- vector<bool> visted(point.size(),false);
- vector
int>> cluster; - for (int i = 0; i < point.size(); i++)
- {
- if (!visted[i])
- {
- if (point[i].size() >= MinPts) //{几个}
- {
- for (set<int>::const_iterator j = point[i].begin(); j != point[i].end(); j++)
- {
- visted[*j] = true;
- if (point[*j].size() >= MinPts) //间接相连
- {
- for (auto k = point[*j].begin(); k != point[*j].end(); k++)
- {
- visted[*k] = true;
- point[i].insert(*k);
- }
- //point[i].insert(point[*j].begin(), point[*j].end());
- }
- }
- cluster.push_back(point[i]);
- }
- }
-
- }
- return cluster;
- }
-
-
- vector
ReadData(string filename) - {
- vector
data; - ifstream file(filename);
- if (file.is_open())
- {
- string line;
- while (getline(file, line))
- {
- istringstream iss(line);
- double x, y;
- string token;
- Point point;
- if (getline(iss, token, ',') && istringstream(token) >> point.x &&
- getline(iss, token, ',') && istringstream(token) >> point.y) {
- data.push_back(point);
- }
- }
- }
- else
- {
- cout << "open fail";
- }
- file.close();
- return data;
- }
-
- int main()
- {
- vector
dataset = ReadData("data.txt"); - double e;
- int MinPts;
- cin >> e >> MinPts;
-
- vector
int>> point; - vector
int>> cluster; - point=getPointSet(dataset, e);
- cluster=DAIANA(point, MinPts);
- for (int i = 0; i < cluster.size(); i++)
- {
- cout << "{";
- int count = 0;
- for (auto j = cluster[i].begin(); j != cluster[i].end(); j++)
- {
-
- cout << *j+1 ;
- if (++count < cluster[i].size())
- {
- cout << ",";
- }
- }
- cout << "}" << endl;
- }
- }
-
或
判断是否是核心点,如果是,将它的直接密度可达点放进set
- std::vector
> dbscan(const std::vector& dataset, double epsilon, int minPts) { - std::vector
> clusters; - std::vector<bool> visited(dataset.size(), false);
-
- for (int i = 0; i < dataset.size(); ++i) {
- if (visited[i]) continue;
-
- visited[i] = true;
- std::vector
cluster; -
- std::set<int> neighbors;
- for (int j = 0; j < dataset.size(); ++j) {
- if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {
- neighbors.insert(j);
- }
- }
-
- if (neighbors.size() >= minPts) {
- for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
- int index = *it;
- if (!visited[index]) {
- visited[index] = true;
- std::set<int> subNeighbors;
-
- for (int j = 0; j < dataset.size(); ++j) {
- if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {
- subNeighbors.insert(j);
- }
- }
-
- if (subNeighbors.size() >= minPts) {
- neighbors.insert(subNeighbors.begin(), subNeighbors.end());
- }
- }
- }
- }
-
- for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
- cluster.push_back(dataset[*it]);
- }
-
- if (!cluster.empty()) {
- clusters.push_back(cluster);
- }
- }
-
- return clusters;
- }
数据集:参考数据挖掘原理与算法第四版DBSCAN例子
1.0, 0.0
4.0, 0.0
0.0, 1.0
1.0, 1.0
2.0, 1.0
3.0,1.0
4.0,1.0
5.0,1.0
0.0,2.0
1.0,2.0
4.0,2.0
1.0,3.0
结果
