Commit e4abab37 authored by Davis King's avatar Davis King

Made extract_fhog_features() a little faster by adding more SIMD instructions.

parent 26eb8143
...@@ -522,52 +522,73 @@ namespace dlib ...@@ -522,52 +522,73 @@ namespace dlib
const int yy = y+padding_rows_offset; const int yy = y+padding_rows_offset;
for (int x = 0; x < hog_nc; x++) for (int x = 0; x < hog_nc; x++)
{ {
const double nn1 = 0.2*std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps); const simd4f z1(norm[y+1][x+1],
const double nn2 = 0.2*std::sqrt(norm[y][x+1] + norm[y][x+2] + norm[y+1][x+1] + norm[y+1][x+2] + eps); norm[y][x+1],
const double nn3 = 0.2*std::sqrt(norm[y+1][x] + norm[y+1][x+1] + norm[y+2][x] + norm[y+2][x+1] + eps); norm[y+1][x],
const double nn4 = 0.2*std::sqrt(norm[y][x] + norm[y][x+1] + norm[y+1][x] + norm[y+1][x+1] + eps); norm[y][x]);
const double n1 = 0.1 / nn1;
const double n2 = 0.1 / nn2; const simd4f z2(norm[y+1][x+2],
const double n3 = 0.1 / nn3; norm[y][x+2],
const double n4 = 0.1 / nn4; norm[y+1][x+1],
norm[y][x+1]);
double t1 = 0;
double t2 = 0; const simd4f z3(norm[y+2][x+1],
double t3 = 0; norm[y+1][x+1],
double t4 = 0; norm[y+2][x],
norm[y+1][x]);
const simd4f z4(norm[y+2][x+2],
norm[y+1][x+2],
norm[y+2][x+1],
norm[y+1][x+1]);
const simd4f nn = 0.2*sqrt(z1+z2+z3+z4+eps);
const simd4f n = 0.1/nn;
simd4f t = 0;
const int xx = x+padding_cols_offset; const int xx = x+padding_cols_offset;
// contrast-sensitive features // contrast-sensitive features
for (int o = 0; o < 18; o++) for (int o = 0; o < 18; o+=3)
{ {
double h1 = std::min<double>(hist[y+1+1][x+1+1](o) , nn1)*n1; simd4f temp0(hist[y+1+1][x+1+1](o));
double h2 = std::min<double>(hist[y+1+1][x+1+1](o) , nn2)*n2; simd4f temp1(hist[y+1+1][x+1+1](o+1));
double h3 = std::min<double>(hist[y+1+1][x+1+1](o) , nn3)*n3; simd4f temp2(hist[y+1+1][x+1+1](o+2));
double h4 = std::min<double>(hist[y+1+1][x+1+1](o) , nn4)*n4; simd4f h0 = min(temp0,nn)*n;
set_hog(hog,o,xx,yy, (h1 + h2 + h3 + h4)); simd4f h1 = min(temp1,nn)*n;
t1 += h1; simd4f h2 = min(temp2,nn)*n;
t2 += h2; set_hog(hog,o,xx,yy, sum(h0));
t3 += h3; set_hog(hog,o+1,xx,yy, sum(h1));
t4 += h4; set_hog(hog,o+2,xx,yy, sum(h2));
t += h0+h1+h2;
} }
t *= 2*0.2357;
// contrast-insensitive features // contrast-insensitive features
for (int o = 0; o < 9; o++) for (int o = 0; o < 9; o+=3)
{ {
double sum = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9); simd4f temp0 = hist[y+1+1][x+1+1](o) + hist[y+1+1][x+1+1](o+9);
double h1 = std::min(sum , nn1)*n1; simd4f temp1 = hist[y+1+1][x+1+1](o+1) + hist[y+1+1][x+1+1](o+9+1);
double h2 = std::min(sum , nn2)*n2; simd4f temp2 = hist[y+1+1][x+1+1](o+2) + hist[y+1+1][x+1+1](o+9+2);
double h3 = std::min(sum , nn3)*n3; simd4f h0 = min(temp0,nn)*n;
double h4 = std::min(sum , nn4)*n4; simd4f h1 = min(temp1,nn)*n;
set_hog(hog,o+18,xx,yy, (h1 + h2 + h3 + h4)); simd4f h2 = min(temp2,nn)*n;
set_hog(hog,o+18,xx,yy, sum(h0));
set_hog(hog,o+18+1,xx,yy, sum(h1));
set_hog(hog,o+18+2,xx,yy, sum(h2));
} }
float temp[4];
t.store(temp);
// texture features // texture features
set_hog(hog,27,xx,yy, 2*0.2357 * t1); set_hog(hog,27,xx,yy, temp[0]);
set_hog(hog,28,xx,yy, 2*0.2357 * t2); set_hog(hog,28,xx,yy, temp[1]);
set_hog(hog,29,xx,yy, 2*0.2357 * t3); set_hog(hog,29,xx,yy, temp[2]);
set_hog(hog,30,xx,yy, 2*0.2357 * t4); set_hog(hog,30,xx,yy, temp[3]);
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment