Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
7e26d2cf
Commit
7e26d2cf
authored
Jun 26, 2015
by
Davis King
Browse files
Options
Browse Files
Download
Plain Diff
merged
parents
c590ee67
2c8b159e
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
292 additions
and
0 deletions
+292
-0
clustering.h
dlib/clustering.h
+1
-0
bottom_up_cluster.h
dlib/clustering/bottom_up_cluster.h
+138
-0
bottom_up_cluster_abstract.h
dlib/clustering/bottom_up_cluster_abstract.h
+51
-0
clustering.cpp
dlib/test/clustering.cpp
+85
-0
ml.xml
docs/docs/ml.xml
+13
-0
term_index.xml
docs/docs/term_index.xml
+1
-0
other.cpp
tools/python/src/other.cpp
+3
-0
No files found.
dlib/clustering.h
View file @
7e26d2cf
...
...
@@ -6,6 +6,7 @@
#include "clustering/modularity_clustering.h"
#include "clustering/chinese_whispers.h"
#include "clustering/spectral_cluster.h"
#include "clustering/bottom_up_cluster.h"
#include "svm/kkmeans.h"
#endif // DLIB_CLuSTERING_
...
...
dlib/clustering/bottom_up_cluster.h
0 → 100644
View file @
7e26d2cf
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_BOTTOM_uP_CLUSTER_Hh_
#define DLIB_BOTTOM_uP_CLUSTER_Hh_
#include <queue>
#include <map>
#include "bottom_up_cluster_abstract.h"
#include "../algs.h"
#include "../matrix.h"
#include "../disjoint_subsets.h"
#include "../graph_utils.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
namespace
buc_impl
{
inline
void
merge_sets
(
matrix
<
double
>&
dists
,
unsigned
long
dest
,
unsigned
long
src
)
{
for
(
long
r
=
0
;
r
<
dists
.
nr
();
++
r
)
dists
(
dest
,
r
)
=
dists
(
r
,
dest
)
=
std
::
max
(
dists
(
r
,
dest
),
dists
(
r
,
src
));
}
struct
compare_dist
{
bool
operator
()
(
const
sample_pair
&
a
,
const
sample_pair
&
b
)
const
{
return
a
.
distance
()
>
b
.
distance
();
}
};
}
// ----------------------------------------------------------------------------------------
template
<
typename
EXP
>
unsigned
long
bottom_up_cluster
(
const
matrix_exp
<
EXP
>&
dists_
,
std
::
vector
<
unsigned
long
>&
labels
,
unsigned
long
min_num_clusters
,
double
max_dist
=
std
::
numeric_limits
<
double
>::
infinity
()
)
{
matrix
<
double
>
dists
=
matrix_cast
<
double
>
(
dists_
);
// make sure requires clause is not broken
DLIB_CASSERT
(
dists
.
nr
()
==
dists
.
nc
()
&&
min_num_clusters
>
0
,
"
\t
unsigned long bottom_up_cluster()"
<<
"
\n\t
Invalid inputs were given to this function."
<<
"
\n\t
dists.nr(): "
<<
dists
.
nr
()
<<
"
\n\t
dists.nc(): "
<<
dists
.
nc
()
<<
"
\n\t
min_num_clusters: "
<<
min_num_clusters
);
using
namespace
buc_impl
;
labels
.
resize
(
dists
.
nr
());
disjoint_subsets
sets
;
sets
.
set_size
(
dists
.
nr
());
if
(
labels
.
size
()
==
0
)
return
0
;
// push all the edges in the graph into a priority queue so the best edges to merge
// come first.
std
::
priority_queue
<
sample_pair
,
std
::
vector
<
sample_pair
>
,
compare_dist
>
que
;
for
(
long
r
=
0
;
r
<
dists
.
nr
();
++
r
)
for
(
long
c
=
r
+
1
;
c
<
dists
.
nc
();
++
c
)
que
.
push
(
sample_pair
(
r
,
c
,
dists
(
r
,
c
)));
// Now start merging nodes.
for
(
unsigned
long
iter
=
min_num_clusters
;
iter
<
sets
.
size
();
++
iter
)
{
// find the next best thing to merge.
double
best_dist
=
que
.
top
().
distance
();
unsigned
long
a
=
sets
.
find_set
(
que
.
top
().
index1
());
unsigned
long
b
=
sets
.
find_set
(
que
.
top
().
index2
());
que
.
pop
();
// we have been merging and modifying the distances, so make sure this distance
// is still valid and these guys haven't been merged already.
while
(
a
==
b
||
best_dist
<
dists
(
a
,
b
))
{
// Haven't merged it yet, so put it back in with updated distance for
// reconsideration later.
if
(
a
!=
b
)
que
.
push
(
sample_pair
(
a
,
b
,
dists
(
a
,
b
)));
best_dist
=
que
.
top
().
distance
();
a
=
sets
.
find_set
(
que
.
top
().
index1
());
b
=
sets
.
find_set
(
que
.
top
().
index2
());
que
.
pop
();
}
// now merge these sets if the best distance is small enough
if
(
best_dist
>
max_dist
)
break
;
unsigned
long
news
=
sets
.
merge_sets
(
a
,
b
);
unsigned
long
olds
=
(
news
==
a
)
?
b
:
a
;
merge_sets
(
dists
,
news
,
olds
);
}
// figure out which cluster each element is in. Also make sure the labels are
// contiguous.
std
::
map
<
unsigned
long
,
unsigned
long
>
relabel
;
for
(
unsigned
long
r
=
0
;
r
<
labels
.
size
();
++
r
)
{
unsigned
long
l
=
sets
.
find_set
(
r
);
// relabel to make contiguous
if
(
relabel
.
count
(
l
)
==
0
)
{
unsigned
long
next
=
relabel
.
size
();
relabel
[
l
]
=
next
;
}
labels
[
r
]
=
relabel
[
l
];
}
return
relabel
.
size
();
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_BOTTOM_uP_CLUSTER_Hh_
dlib/clustering/bottom_up_cluster_abstract.h
0 → 100644
View file @
7e26d2cf
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
#include "../matrix.h"
namespace
dlib
{
// ----------------------------------------------------------------------------------------
template
<
typename
EXP
>
unsigned
long
bottom_up_cluster
(
const
matrix_exp
<
EXP
>&
dists
,
std
::
vector
<
unsigned
long
>&
labels
,
unsigned
long
min_num_clusters
,
double
max_dist
=
std
::
numeric_limits
<
double
>::
infinity
()
);
/*!
requires
- dists.nr() == dists.nc()
- min_num_clusters > 0
- dists == trans(dists)
(l.e. dists should be symmetric)
ensures
- Runs a bottom up agglomerative clustering algorithm.
- Interprets dists as a matrix that gives the distances between dists.nr()
items. In particular, we take dists(i,j) to be the distance between the ith
and jth element of some set. This function clusters the elements of this set
into at least min_num_clusters (or dists.nr() if there aren't enough
elements). Additionally, within each cluster, the maximum pairwise distance
between any two cluster elements is <= max_dist.
- returns the number of clusters found.
- #labels.size() == dists.nr()
- for all valid i:
- #labels[i] == the cluster ID of the node with index i (i.e. the node
corresponding to the distances dists(i,*)).
- 0 <= #labels[i] < the number of clusters found
(i.e. cluster IDs are assigned contiguously and start at 0)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
dlib/test/clustering.cpp
View file @
7e26d2cf
...
...
@@ -220,6 +220,89 @@ namespace
}
}
void
test_bottom_up_clustering
()
{
std
::
vector
<
dpoint
>
pts
;
pts
.
push_back
(
dpoint
(
0.0
,
0.0
));
pts
.
push_back
(
dpoint
(
0.5
,
0.0
));
pts
.
push_back
(
dpoint
(
0.5
,
0.5
));
pts
.
push_back
(
dpoint
(
0.0
,
0.5
));
pts
.
push_back
(
dpoint
(
3.0
,
3.0
));
pts
.
push_back
(
dpoint
(
3.5
,
3.0
));
pts
.
push_back
(
dpoint
(
3.5
,
3.5
));
pts
.
push_back
(
dpoint
(
3.0
,
3.5
));
pts
.
push_back
(
dpoint
(
7.0
,
7.0
));
pts
.
push_back
(
dpoint
(
7.5
,
7.0
));
pts
.
push_back
(
dpoint
(
7.5
,
7.5
));
pts
.
push_back
(
dpoint
(
7.0
,
7.5
));
matrix
<
double
>
dists
(
pts
.
size
(),
pts
.
size
());
for
(
long
r
=
0
;
r
<
dists
.
nr
();
++
r
)
for
(
long
c
=
0
;
c
<
dists
.
nc
();
++
c
)
dists
(
r
,
c
)
=
length
(
pts
[
r
]
-
pts
[
c
]);
matrix
<
unsigned
long
,
0
,
1
>
truth
(
12
);
truth
=
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
;
std
::
vector
<
unsigned
long
>
labels
;
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
3
)
==
3
);
DLIB_TEST
(
mat
(
labels
)
==
truth
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
,
4.0
)
==
3
);
DLIB_TEST
(
mat
(
labels
)
==
truth
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
,
4.95
)
==
2
);
truth
=
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
;
DLIB_TEST
(
mat
(
labels
)
==
truth
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
)
==
1
);
truth
=
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
;
DLIB_TEST
(
mat
(
labels
)
==
truth
);
dists
.
set_size
(
0
,
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
3
)
==
0
);
DLIB_TEST
(
labels
.
size
()
==
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
)
==
0
);
DLIB_TEST
(
labels
.
size
()
==
0
);
dists
.
set_size
(
1
,
1
);
dists
=
1
;
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
3
)
==
1
);
DLIB_TEST
(
labels
.
size
()
==
1
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
)
==
1
);
DLIB_TEST
(
labels
.
size
()
==
1
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
,
0
)
==
1
);
DLIB_TEST
(
labels
.
size
()
==
1
);
DLIB_TEST
(
labels
[
0
]
==
0
);
dists
.
set_size
(
2
,
2
);
dists
=
1
;
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
3
)
==
2
);
DLIB_TEST
(
labels
.
size
()
==
2
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
labels
[
1
]
==
1
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
)
==
1
);
DLIB_TEST
(
labels
.
size
()
==
2
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
labels
[
1
]
==
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
,
1
)
==
1
);
DLIB_TEST
(
labels
.
size
()
==
2
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
labels
[
1
]
==
0
);
DLIB_TEST
(
bottom_up_cluster
(
dists
,
labels
,
1
,
0.999
)
==
2
);
DLIB_TEST
(
labels
.
size
()
==
2
);
DLIB_TEST
(
labels
[
0
]
==
0
);
DLIB_TEST
(
labels
[
1
]
==
1
);
}
class
test_clustering
:
public
tester
{
public
:
...
...
@@ -232,6 +315,8 @@ namespace
void
perform_test
(
)
{
test_bottom_up_clustering
();
dlib
::
rand
rnd
;
std
::
vector
<
sample_pair
>
edges
;
...
...
docs/docs/ml.xml
View file @
7e26d2cf
...
...
@@ -115,6 +115,7 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
<item>
newman_cluster
</item>
<item>
spectral_cluster
</item>
<item>
chinese_whispers
</item>
<item>
bottom_up_cluster
</item>
<item>
modularity
</item>
</section>
<section>
...
...
@@ -320,6 +321,18 @@ Davis E. King. <a href="http://jmlr.csail.mit.edu/papers/volume10/king09a/king09
</component>
<!-- ************************************************************************* -->
<component>
<name>
bottom_up_cluster
</name>
<file>
dlib/clustering.h
</file>
<spec_file
link=
"true"
>
dlib/clustering/bottom_up_cluster_abstract.h
</spec_file>
<description>
This function runs a bottom up agglomerative clustering algorithm.
</description>
</component>
<!-- ************************************************************************* -->
<component>
...
...
docs/docs/term_index.xml
View file @
7e26d2cf
...
...
@@ -428,6 +428,7 @@
<term
file=
"ml.html"
name=
"newman_cluster"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"spectral_cluster"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"chinese_whispers"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"bottom_up_cluster"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"modularity"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"pick_initial_centers"
include=
"dlib/clustering.h"
/>
<term
file=
"ml.html"
name=
"rank_features"
include=
"dlib/svm.h"
/>
...
...
tools/python/src/other.cpp
View file @
7e26d2cf
...
...
@@ -57,6 +57,9 @@ list _max_cost_assignment (
const
matrix
<
double
>&
cost
)
{
if
(
cost
.
nr
()
!=
cost
.
nc
())
throw
dlib
::
error
(
"The input matrix must be square."
);
// max_cost_assignment() only works with integer matrices, so convert from
// double to integer.
const
double
scale
=
(
std
::
numeric_limits
<
dlib
::
int64
>::
max
()
/
1000
)
/
max
(
abs
(
cost
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment