Commit 50d1ff98 authored by Davis King's avatar Davis King

Made the sparse version of svd_fast() use multiple CPU cores.

parent 0cfef582
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include "lapack/gesvd.h" #include "lapack/gesvd.h"
#endif #endif
#include "../threads.h"
#include <iostream> #include <iostream>
namespace dlib namespace dlib
...@@ -644,13 +646,13 @@ convergence: ...@@ -644,13 +646,13 @@ convergence:
Q.set_size(A.size(), l); Q.set_size(A.size(), l);
// Compute Q = A*gaussian_randm() // Compute Q = A*gaussian_randm()
for (long r = 0; r < Q.nr(); ++r) parallel_for(0, Q.nr(), [&](long r)
{ {
for (long c = 0; c < Q.nc(); ++c) for (long c = 0; c < Q.nc(); ++c)
{ {
Q(r,c) = dot(A[r], gaussian_randm(std::numeric_limits<long>::max(), 1, c)); Q(r,c) = dot(A[r], gaussian_randm(std::numeric_limits<long>::max(), 1, c));
} }
} });
orthogonalize(Q); orthogonalize(Q);
...@@ -658,39 +660,45 @@ convergence: ...@@ -658,39 +660,45 @@ convergence:
// span of the most important singular vectors of A. // span of the most important singular vectors of A.
if (q != 0) if (q != 0)
{ {
dlib::mutex mut;
const unsigned long n = max_index_plus_one(A); const unsigned long n = max_index_plus_one(A);
for (unsigned long itr = 0; itr < q; ++itr) for (unsigned long itr = 0; itr < q; ++itr)
{ {
matrix<T,0,0,MM,L> Z(n, l); matrix<T,0,0,MM> Z;
// Compute Z = trans(A)*Q // Compute Z = trans(A)*Q
Z = 0; parallel_for_blocked(0, A.size(), [&](long begin, long end)
for (unsigned long m = 0; m < A.size(); ++m)
{ {
for (unsigned long r = 0; r < l; ++r) matrix<T,0,0,MM> Zlocal(n,l);
Zlocal = 0;
for (long m = begin; m < end; ++m)
{ {
typename sparse_vector_type::const_iterator i; for (unsigned long r = 0; r < l; ++r)
for (i = A[m].begin(); i != A[m].end(); ++i)
{ {
const unsigned long c = i->first; for (auto& i : A[m])
const T val = i->second; {
const auto c = i.first;
const auto val = i.second;
Z(c,r) += Q(m,r)*val; Zlocal(c,r) += Q(m,r)*val;
}
} }
} }
} auto_mutex lock(mut);
Z += Zlocal;
},1);
Q.set_size(0,0); // free RAM Q.set_size(0,0); // free RAM
orthogonalize(Z); orthogonalize(Z);
// Compute Q = A*Z // Compute Q = A*Z
Q.set_size(A.size(), l); Q.set_size(A.size(), l);
for (long r = 0; r < Q.nr(); ++r) parallel_for(0, Q.nr(), [&](long r)
{ {
for (long c = 0; c < Q.nc(); ++c) for (long c = 0; c < Q.nc(); ++c)
{ {
Q(r,c) = dot(A[r], colm(Z,c)); Q(r,c) = dot(A[r], colm(Z,c));
} }
} });
Z.set_size(0,0); // free RAM Z.set_size(0,0); // free RAM
orthogonalize(Q); orthogonalize(Q);
...@@ -736,22 +744,28 @@ convergence: ...@@ -736,22 +744,28 @@ convergence:
// is so that when we take its SVD later using svd3() it doesn't consume // is so that when we take its SVD later using svd3() it doesn't consume
// a whole lot of RAM. That is, we make sure the square matrix coming out // a whole lot of RAM. That is, we make sure the square matrix coming out
// of svd3() has size lxl rather than the potentially much larger nxn. // of svd3() has size lxl rather than the potentially much larger nxn.
matrix<T,0,0,MM,L> B(n,k); matrix<T,0,0,MM> B;
B = 0; dlib::mutex mut;
for (unsigned long m = 0; m < A.size(); ++m) parallel_for_blocked(0, A.size(), [&](long begin, long end)
{ {
for (unsigned long r = 0; r < k; ++r) matrix<T,0,0,MM> Blocal(n,k);
Blocal = 0;
for (long m = begin; m < end; ++m)
{ {
typename sparse_vector_type::const_iterator i; for (unsigned long r = 0; r < k; ++r)
for (i = A[m].begin(); i != A[m].end(); ++i)
{ {
const unsigned long c = i->first; for (auto& i : A[m])
const T val = i->second; {
const auto c = i.first;
const auto val = i.second;
B(c,r) += Q(m,r)*val; Blocal(c,r) += Q(m,r)*val;
}
} }
} }
} auto_mutex lock(mut);
B += Blocal;
},1);
svd3(B, v,w,u); svd3(B, v,w,u);
u = Q*u; u = Q*u;
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <dlib/statistics.h> #include <dlib/statistics.h>
#include <dlib/sparse_vector.h> #include <dlib/sparse_vector.h>
#include <dlib/timing.h>
#include <map> #include <map>
#include "tester.h" #include "tester.h"
...@@ -372,6 +373,57 @@ namespace ...@@ -372,6 +373,57 @@ namespace
test_svd_fast(1, 2, 1); test_svd_fast(1, 2, 1);
} }
// ----------------------------------------------------------------------------------------
typedef std::vector<std::pair<unsigned int, float>> sv;
sv rand_sparse_vector()
{
static dlib::rand rnd;
sv v;
for (int i = 0; i < 50; ++i)
v.push_back(make_pair(rnd.get_integer(400000), rnd.get_random_gaussian()*100));
make_sparse_vector_inplace(v);
return v;
}
sv rand_basis_combo(const std::vector<sv>& basis)
{
static dlib::rand rnd;
sv result;
for (int i = 0; i < 5; ++i)
{
sv temp = basis[rnd.get_integer(basis.size())];
scale_by(temp, rnd.get_random_gaussian());
result = add(result,temp);
}
return result;
}
void big_sparse_speed_test()
{
cout << "making A" << endl;
std::vector<sv> basis;
for (int i = 0; i < 100; ++i)
basis.emplace_back(rand_sparse_vector());
std::vector<sv> A;
for (int i = 0; i < 500000; ++i)
A.emplace_back(rand_basis_combo(basis));
cout << "done making A" << endl;
matrix<float> u,v;
matrix<float,0,1> w;
{
timing::block aosijdf(0,"call it");
svd_fast(A, u,w,v, 100, 5);
}
timing::print();
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
class test_cca : public tester class test_cca : public tester
...@@ -386,6 +438,7 @@ namespace ...@@ -386,6 +438,7 @@ namespace
void perform_test ( void perform_test (
) )
{ {
//big_sparse_speed_test();
for (int i = 0; i < 200; ++i) for (int i = 0; i < 200; ++i)
{ {
test_cca1(); test_cca1();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment