Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
986273f2
Commit
986273f2
authored
Jul 01, 2015
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added find_clusters_using_angular_kmeans()
parent
2c8b159e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
221 additions
and
18 deletions
+221
-18
kkmeans.h
dlib/svm/kkmeans.h
+132
-0
kkmeans_abstract.h
dlib/svm/kkmeans_abstract.h
+41
-1
kmeans.cpp
dlib/test/kmeans.cpp
+48
-17
No files found.
dlib/svm/kkmeans.h
View file @
986273f2
...
@@ -477,6 +477,138 @@ namespace dlib
...
@@ -477,6 +477,138 @@ namespace dlib
}
}
}
}
}
// ----------------------------------------------------------------------------------------
template
<
typename
array_type
,
typename
sample_type
,
typename
alloc
>
void
find_clusters_using_angular_kmeans
(
const
array_type
&
samples
,
std
::
vector
<
sample_type
,
alloc
>&
centers
,
unsigned
long
max_iter
=
1000
)
{
// make sure requires clause is not broken
DLIB_ASSERT
(
samples
.
size
()
>
0
&&
centers
.
size
()
>
0
,
"
\t
void find_clusters_using_angular_kmeans()"
<<
"
\n\t
You passed invalid arguments to this function"
<<
"
\n\t
samples.size(): "
<<
samples
.
size
()
<<
"
\n\t
centers.size(): "
<<
centers
.
size
()
);
#ifdef ENABLE_ASSERTS
{
const
long
nr
=
samples
[
0
].
nr
();
const
long
nc
=
samples
[
0
].
nc
();
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
DLIB_ASSERT
(
is_vector
(
samples
[
i
])
&&
samples
[
i
].
nr
()
==
nr
&&
samples
[
i
].
nc
()
==
nc
,
"
\t
void find_clusters_using_angular_kmeans()"
<<
"
\n\t
You passed invalid arguments to this function"
<<
"
\n\t
is_vector(samples[i]): "
<<
is_vector
(
samples
[
i
])
<<
"
\n\t
samples[i].nr(): "
<<
samples
[
i
].
nr
()
<<
"
\n\t
nr: "
<<
nr
<<
"
\n\t
samples[i].nc(): "
<<
samples
[
i
].
nc
()
<<
"
\n\t
nc: "
<<
nc
<<
"
\n\t
i: "
<<
i
);
}
}
#endif
typedef
typename
sample_type
::
type
scalar_type
;
sample_type
zero
(
centers
[
0
]);
set_all_elements
(
zero
,
0
);
unsigned
long
seed
=
0
;
// tells which center a sample belongs to
std
::
vector
<
unsigned
long
>
assignments
(
samples
.
size
(),
samples
.
size
());
std
::
vector
<
double
>
lengths
;
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
lengths
.
push_back
(
length
(
samples
[
i
]));
// If there are zero vectors in samples then just say their length is 1 so we
// can avoid a division by zero check later on. Also, this doesn't matter
// since zero vectors can be assigned to any cluster randomly as there is no
// basis for picking one based on angle.
if
(
lengths
.
back
()
==
0
)
lengths
.
back
()
=
1
;
}
// We will keep the centers as unit vectors at all times throughout the processing.
for
(
unsigned
long
i
=
0
;
i
<
centers
.
size
();
++
i
)
{
double
len
=
length
(
centers
[
i
]);
// Avoid having length 0 centers. If that is the case then pick another center
// at random.
while
(
len
==
0
)
{
centers
[
i
]
=
matrix_cast
<
scalar_type
>
(
gaussian_randm
(
centers
[
i
].
nr
(),
centers
[
i
].
nc
(),
seed
++
));
len
=
length
(
centers
[
i
]);
}
centers
[
i
]
/=
len
;
}
unsigned
long
iter
=
0
;
bool
centers_changed
=
true
;
while
(
centers_changed
&&
iter
<
max_iter
)
{
++
iter
;
centers_changed
=
false
;
// loop over each sample and see which center it is closest to
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
// find the best center for sample[i]
scalar_type
best_angle
=
std
::
numeric_limits
<
scalar_type
>::
max
();
unsigned
long
best_center
=
0
;
for
(
unsigned
long
j
=
0
;
j
<
centers
.
size
();
++
j
)
{
scalar_type
angle
=
-
dot
(
centers
[
j
],
samples
[
i
])
/
lengths
[
i
];
if
(
angle
<
best_angle
)
{
best_angle
=
angle
;
best_center
=
j
;
}
}
if
(
assignments
[
i
]
!=
best_center
)
{
centers_changed
=
true
;
assignments
[
i
]
=
best_center
;
}
}
// now update all the centers
centers
.
assign
(
centers
.
size
(),
zero
);
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
centers
[
assignments
[
i
]]
+=
samples
[
i
];
}
// Now length normalize all the centers.
for
(
unsigned
long
i
=
0
;
i
<
centers
.
size
();
++
i
)
{
double
len
=
length
(
centers
[
i
]);
// Avoid having length 0 centers. If that is the case then pick another center
// at random.
while
(
len
==
0
)
{
centers
[
i
]
=
matrix_cast
<
scalar_type
>
(
gaussian_randm
(
centers
[
i
].
nr
(),
centers
[
i
].
nc
(),
seed
++
));
len
=
length
(
centers
[
i
]);
centers_changed
=
true
;
}
centers
[
i
]
/=
len
;
}
}
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
...
...
dlib/svm/kkmeans_abstract.h
View file @
986273f2
...
@@ -283,7 +283,7 @@ namespace dlib
...
@@ -283,7 +283,7 @@ namespace dlib
- centers.size() > 0
- centers.size() > 0
- array_type == something with an interface compatible with std::vector
- array_type == something with an interface compatible with std::vector
and it must contain row or column vectors capable of being stored in
and it must contain row or column vectors capable of being stored in
sample_type objects
sample_type objects
.
- sample_type == a dlib::matrix capable of representing vectors
- sample_type == a dlib::matrix capable of representing vectors
ensures
ensures
- performs regular old linear kmeans clustering on the samples. The clustering
- performs regular old linear kmeans clustering on the samples. The clustering
...
@@ -293,6 +293,46 @@ namespace dlib
...
@@ -293,6 +293,46 @@ namespace dlib
terminates.
terminates.
!*/
!*/
// ----------------------------------------------------------------------------------------
template
<
typename
array_type
,
typename
sample_type
,
typename
alloc
>
void
find_clusters_using_angular_kmeans
(
const
array_type
&
samples
,
std
::
vector
<
sample_type
,
alloc
>&
centers
,
unsigned
long
max_iter
=
1000
);
/*!
requires
- samples.size() > 0
- samples == a bunch of row or column vectors and they all must be of the
same length.
- centers.size() > 0
- array_type == something with an interface compatible with std::vector
and it must contain row or column vectors capable of being stored in
sample_type objects.
- sample_type == a dlib::matrix capable of representing vectors
ensures
- performs linear kmeans clustering on the samples, except instead of using
Euclidean distance to compare samples to the centers it uses the angle
between a sample and a center (with respect to the origin). So we try to
cluster samples together if they have small angles with respect to each
other. The clustering begins with the initial set of centers given as an
argument to this function. When it finishes #centers will contain the
resulting centers.
- for all valid i:
- length(#centers[i]) == 1
(i.e. the output centers are scaled to be unit vectors since their
magnitude is irrelevant. Moreover, this makes it so you can use
functions like nearest_center() with #centers to find the cluster
assignments.)
- No more than max_iter iterations will be performed before this function
terminates.
!*/
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template
<
template
<
...
...
dlib/test/kmeans.cpp
View file @
986273f2
...
@@ -44,32 +44,63 @@ namespace
...
@@ -44,32 +44,63 @@ namespace
randomize_samples
(
samples
);
randomize_samples
(
samples
);
std
::
vector
<
sample_type
>
centers
;
{
pick_initial_centers
(
seed_centers
.
size
(),
centers
,
samples
,
linear_kernel
<
sample_type
>
());
std
::
vector
<
sample_type
>
centers
;
pick_initial_centers
(
seed_centers
.
size
(),
centers
,
samples
,
linear_kernel
<
sample_type
>
());
find_clusters_using_kmeans
(
samples
,
centers
);
find_clusters_using_kmeans
(
samples
,
centers
);
DLIB_TEST
(
centers
.
size
()
==
seed_centers
.
size
());
DLIB_TEST
(
centers
.
size
()
==
seed_centers
.
size
());
std
::
vector
<
int
>
hits
(
centers
.
size
(),
0
);
std
::
vector
<
int
>
hits
(
centers
.
size
(),
0
);
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
unsigned
long
best_idx
=
0
;
double
best_dist
=
1e100
;
for
(
unsigned
long
j
=
0
;
j
<
centers
.
size
();
++
j
)
{
{
if
(
length
(
samples
[
i
]
-
centers
[
j
])
<
best_dist
)
unsigned
long
best_idx
=
0
;
double
best_dist
=
1e100
;
for
(
unsigned
long
j
=
0
;
j
<
centers
.
size
();
++
j
)
{
{
best_dist
=
length
(
samples
[
i
]
-
centers
[
j
]);
if
(
length
(
samples
[
i
]
-
centers
[
j
])
<
best_dist
)
best_idx
=
j
;
{
best_dist
=
length
(
samples
[
i
]
-
centers
[
j
]);
best_idx
=
j
;
}
}
}
hits
[
best_idx
]
++
;
}
}
hits
[
best_idx
]
++
;
}
for
(
unsigned
long
i
=
0
;
i
<
hits
.
size
();
++
i
)
for
(
unsigned
long
i
=
0
;
i
<
hits
.
size
();
++
i
)
{
DLIB_TEST
(
hits
[
i
]
==
250
);
}
}
{
{
DLIB_TEST
(
hits
[
i
]
==
250
);
std
::
vector
<
sample_type
>
centers
;
pick_initial_centers
(
seed_centers
.
size
(),
centers
,
samples
,
linear_kernel
<
sample_type
>
());
find_clusters_using_angular_kmeans
(
samples
,
centers
);
DLIB_TEST
(
centers
.
size
()
==
seed_centers
.
size
());
std
::
vector
<
int
>
hits
(
centers
.
size
(),
0
);
for
(
unsigned
long
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
unsigned
long
best_idx
=
0
;
double
best_dist
=
1e100
;
for
(
unsigned
long
j
=
0
;
j
<
centers
.
size
();
++
j
)
{
if
(
length
(
samples
[
i
]
-
centers
[
j
])
<
best_dist
)
{
best_dist
=
length
(
samples
[
i
]
-
centers
[
j
]);
best_idx
=
j
;
}
}
hits
[
best_idx
]
++
;
}
for
(
unsigned
long
i
=
0
;
i
<
hits
.
size
();
++
i
)
{
DLIB_TEST
(
hits
[
i
]
==
250
);
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment