Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
2c2f9556
Commit
2c2f9556
authored
Apr 28, 2013
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added cca() bindings
parent
8770498c
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
141 additions
and
2 deletions
+141
-2
CMakeLists.txt
tools/python/CMakeLists.txt
+1
-0
cca.cpp
tools/python/src/cca.cpp
+127
-0
dlib.cpp
tools/python/src/dlib.cpp
+7
-0
matrix.cpp
tools/python/src/matrix.cpp
+6
-2
No files found.
tools/python/CMakeLists.txt
View file @
2c2f9556
...
@@ -12,4 +12,5 @@ add_python_module(dlib
...
@@ -12,4 +12,5 @@ add_python_module(dlib
src/decision_functions.cpp
src/decision_functions.cpp
src/other.cpp
src/other.cpp
src/basic.cpp
src/basic.cpp
src/cca.cpp
)
)
tools/python/src/cca.cpp
0 → 100644
View file @
2c2f9556
#include <boost/python.hpp>
#include <boost/shared_ptr.hpp>
#include <dlib/statistics.h>
#include "pyassert.h"
#include <boost/python/args.hpp>
using
namespace
dlib
;
using
namespace
boost
::
python
;
typedef
std
::
vector
<
std
::
pair
<
unsigned
long
,
double
>
>
sparse_vect
;
struct
cca_outputs
{
matrix
<
double
,
0
,
1
>
correlations
;
matrix
<
double
>
Ltrans
;
matrix
<
double
>
Rtrans
;
};
cca_outputs
_cca1
(
const
std
::
vector
<
sparse_vect
>&
L
,
const
std
::
vector
<
sparse_vect
>&
R
,
unsigned
long
num_correlations
,
unsigned
long
extra_rank
,
unsigned
long
q
,
double
regularization
)
{
pyassert
(
num_correlations
>
0
&&
L
.
size
()
>
0
&&
R
.
size
()
>
0
&&
L
.
size
()
==
R
.
size
()
&&
regularization
>=
0
,
"Invalid inputs"
);
cca_outputs
temp
;
temp
.
correlations
=
cca
(
L
,
R
,
temp
.
Ltrans
,
temp
.
Rtrans
,
num_correlations
,
extra_rank
,
q
,
regularization
);
return
temp
;
}
// ----------------------------------------------------------------------------------------
unsigned
long
sparse_vector_max_index_plus_one
(
const
sparse_vect
&
v
)
{
return
max_index_plus_one
(
v
);
}
matrix
<
double
,
0
,
1
>
apply_cca_transform
(
const
matrix
<
double
>&
m
,
const
sparse_vect
&
v
)
{
pyassert
(
max_index_plus_one
(
v
)
<=
m
.
nr
(),
"Invalid Inputs"
);
return
sparse_matrix_vector_multiply
(
trans
(
m
),
v
);
}
void
bind_cca
()
{
class_
<
cca_outputs
>
(
"_cca_outputs"
)
.
add_property
(
"correlations"
,
&
cca_outputs
::
correlations
)
.
add_property
(
"Ltrans"
,
&
cca_outputs
::
Ltrans
)
.
add_property
(
"Rtrans"
,
&
cca_outputs
::
Rtrans
);
def
(
"max_index_plus_one"
,
sparse_vector_max_index_plus_one
,
arg
(
"v"
),
"ensures
\n
\
- returns the dimensionality of the given sparse vector. That is, returns a
\n
\
number one larger than the maximum index value in the vector. If the vector
\n
\
is empty then returns 0. "
);
def
(
"apply_cca_transform"
,
apply_cca_transform
,
(
arg
(
"m"
),
arg
(
"v"
)),
"requires
\n
\
- max_index_plus_one(v) <= m.nr()
\n
\
ensures
\n
\
- returns trans(m)*v
\n
\
(i.e. multiply m by the vector v and return the result) "
);
def
(
"cca"
,
_cca1
,
(
arg
(
"L"
),
arg
(
"R"
),
arg
(
"num_correlations"
),
arg
(
"extra_rank"
)
=
5
,
arg
(
"q"
)
=
2
,
arg
(
"regularization"
)
=
0
),
"requires
\n
\
- num_correlations > 0
\n
\
- len(L) > 0
\n
\
- len(R) > 0
\n
\
- len(L) == len(r)
\n
\
- regularization >= 0
\n
\
ensures
\n
\
- This function performs a canonical correlation analysis between the vectors
\n
\
in L and R. That is, it finds two transformation matrices, Ltrans and
\n
\
Rtrans, such that row vectors in the transformed matrices L*Ltrans and
\n
\
R*Rtrans are as correlated as possible (note that in this notation we
\n
\
interpret L as a matrix with the input vectors in its rows). Note also that
\n
\
this function tries to find transformations which produce num_correlations
\n
\
dimensional output vectors.
\n
\
- Note that you can easily apply the transformation to a vector using
\n
\
apply_cca_transform(). So for example, like this:
\n
\
- apply_cca_transform(Ltrans, some_sparse_vector)
\n
\
- returns a structure containing the Ltrans and Rtrans transformation matrices
\n
\
as well as the estimated correlations between elements of the transformed
\n
\
vectors.
\n
\
- No centering is applied to the L and R matrices. Therefore, if you want a
\n
\
CCA relative to the centered vectors then you must apply centering yourself
\n
\
before calling cca().
\n
\
- This function works with reduced rank approximations of the L and R matrices.
\n
\
This makes it fast when working with large matrices. In particular, we use
\n
\
the dlib::svd_fast() routine to find reduced rank representations of the input
\n
\
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q)
\n
\
and similarly for R. This means that you can use the extra_rank and q
\n
\
arguments to cca() to influence the accuracy of the reduced rank
\n
\
approximation. However, the default values should work fine for most
\n
\
problems.
\n
\
- This function performs the ridge regression version of Canonical Correlation
\n
\
Analysis when regularization is set to a value > 0. In particular, larger
\n
\
values indicate the solution should be more heavily regularized. This can be
\n
\
useful when the dimensionality of the data is larger than the number of
\n
\
samples.
\n
\
- A good discussion of CCA can be found in the paper
\"
Canonical Correlation
\n
\
Analysis
\"
by David Weenink. In particular, this function is implemented
\n
\
using equations 29 and 30 from his paper. We also use the idea of doing CCA
\n
\
on a reduced rank approximation of L and R as suggested by Paramveer S.
\n
\
Dhillon in his paper
\"
Two Step CCA: A new spectral method for estimating
\n
\
vector models of words
\"
. "
);
}
tools/python/src/dlib.cpp
View file @
2c2f9556
...
@@ -8,10 +8,15 @@ void bind_decision_functions();
...
@@ -8,10 +8,15 @@ void bind_decision_functions();
void
bind_basic_types
();
void
bind_basic_types
();
void
bind_other
();
void
bind_other
();
void
bind_svm_rank_trainer
();
void
bind_svm_rank_trainer
();
void
bind_cca
();
BOOST_PYTHON_MODULE
(
dlib
)
BOOST_PYTHON_MODULE
(
dlib
)
{
{
// Disable printing of the C++ function signature in the python __doc__ string
// since it is full of huge amounts of template clutter.
boost
::
python
::
docstring_options
options
(
true
,
true
,
false
);
bind_matrix
();
bind_matrix
();
bind_vector
();
bind_vector
();
bind_svm_c_trainer
();
bind_svm_c_trainer
();
...
@@ -19,4 +24,6 @@ BOOST_PYTHON_MODULE(dlib)
...
@@ -19,4 +24,6 @@ BOOST_PYTHON_MODULE(dlib)
bind_basic_types
();
bind_basic_types
();
bind_other
();
bind_other
();
bind_svm_rank_trainer
();
bind_svm_rank_trainer
();
bind_cca
();
}
}
tools/python/src/matrix.cpp
View file @
2c2f9556
...
@@ -4,11 +4,13 @@
...
@@ -4,11 +4,13 @@
#include <dlib/matrix.h>
#include <dlib/matrix.h>
#include <dlib/string.h>
#include <dlib/string.h>
#include "serialize_pickle.h"
#include "serialize_pickle.h"
#include <boost/python/args.hpp>
using
namespace
dlib
;
using
namespace
dlib
;
using
namespace
std
;
using
namespace
boost
::
python
;
using
namespace
boost
::
python
;
using
std
::
string
;
using
std
::
ostringstream
;
void
matrix_set_size
(
matrix
<
double
>&
m
,
long
nr
,
long
nc
)
void
matrix_set_size
(
matrix
<
double
>&
m
,
long
nr
,
long
nc
)
...
@@ -159,10 +161,12 @@ void bind_matrix()
...
@@ -159,10 +161,12 @@ void bind_matrix()
class_
<
matrix
<
double
>
>
(
"matrix"
,
init
<>
())
class_
<
matrix
<
double
>
>
(
"matrix"
,
init
<>
())
.
def
(
"__init__"
,
make_constructor
(
&
make_matrix_from_size
))
.
def
(
"__init__"
,
make_constructor
(
&
make_matrix_from_size
))
.
def
(
"set_size"
,
&
matrix_set_size
)
.
def
(
"set_size"
,
&
matrix_set_size
,
(
arg
(
"rows"
),
arg
(
"cols"
)),
"Set the size of the matrix to the given number of rows and columns."
)
.
def
(
"__init__"
,
make_constructor
(
&
from_object
))
.
def
(
"__init__"
,
make_constructor
(
&
from_object
))
.
def
(
"__repr__"
,
&
matrix_double__repr__
)
.
def
(
"__repr__"
,
&
matrix_double__repr__
)
.
def
(
"__str__"
,
&
matrix_double__str__
)
.
def
(
"__str__"
,
&
matrix_double__str__
)
.
def
(
"nr"
,
&
matrix
<
double
>::
nr
,
"Return the number of rows in the matrix."
)
.
def
(
"nc"
,
&
matrix
<
double
>::
nc
,
"Return the number of columns in the matrix."
)
.
def
(
"__len__"
,
&
matrix_double__len__
)
.
def
(
"__len__"
,
&
matrix_double__len__
)
.
def
(
"__getitem__"
,
&
matrix_double__getitem__
,
with_custodian_and_ward_postcall
<
0
,
1
>
())
.
def
(
"__getitem__"
,
&
matrix_double__getitem__
,
with_custodian_and_ward_postcall
<
0
,
1
>
())
.
add_property
(
"shape"
,
&
get_matrix_size
)
.
add_property
(
"shape"
,
&
get_matrix_size
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment