Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
c3a9a7c4
Commit
c3a9a7c4
authored
Apr 11, 2016
by
Davis E. King
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #94 from e-fominov/fhog-simd8f
Added simd8 usage on FHOG features computation
parents
7d7c932f
446a06a0
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
282 additions
and
70 deletions
+282
-70
fhog.h
dlib/image_transforms/fhog.h
+282
-70
No files found.
dlib/image_transforms/fhog.h
View file @
c3a9a7c4
...
@@ -144,6 +144,139 @@ namespace dlib
...
@@ -144,6 +144,139 @@ namespace dlib
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
template
<
typename
image_type
>
inline
typename
dlib
::
enable_if_c
<
pixel_traits
<
typename
image_type
::
pixel_type
>::
rgb
>::
type
get_gradient
(
const
int
r
,
const
int
c
,
const
image_type
&
img
,
simd8f
&
grad_x
,
simd8f
&
grad_y
,
simd8f
&
len
)
{
simd8i
rleft
((
int
)
img
[
r
][
c
-
1
].
red
,
(
int
)
img
[
r
][
c
].
red
,
(
int
)
img
[
r
][
c
+
1
].
red
,
(
int
)
img
[
r
][
c
+
2
].
red
,
(
int
)
img
[
r
][
c
+
3
].
red
,
(
int
)
img
[
r
][
c
+
4
].
red
,
(
int
)
img
[
r
][
c
+
5
].
red
,
(
int
)
img
[
r
][
c
+
6
].
red
);
simd8i
rright
((
int
)
img
[
r
][
c
+
1
].
red
,
(
int
)
img
[
r
][
c
+
2
].
red
,
(
int
)
img
[
r
][
c
+
3
].
red
,
(
int
)
img
[
r
][
c
+
4
].
red
,
(
int
)
img
[
r
][
c
+
5
].
red
,
(
int
)
img
[
r
][
c
+
6
].
red
,
(
int
)
img
[
r
][
c
+
7
].
red
,
(
int
)
img
[
r
][
c
+
8
].
red
);
simd8i
rtop
((
int
)
img
[
r
-
1
][
c
].
red
,
(
int
)
img
[
r
-
1
][
c
+
1
].
red
,
(
int
)
img
[
r
-
1
][
c
+
2
].
red
,
(
int
)
img
[
r
-
1
][
c
+
3
].
red
,
(
int
)
img
[
r
-
1
][
c
+
4
].
red
,
(
int
)
img
[
r
-
1
][
c
+
5
].
red
,
(
int
)
img
[
r
-
1
][
c
+
6
].
red
,
(
int
)
img
[
r
-
1
][
c
+
7
].
red
);
simd8i
rbottom
((
int
)
img
[
r
+
1
][
c
].
red
,
(
int
)
img
[
r
+
1
][
c
+
1
].
red
,
(
int
)
img
[
r
+
1
][
c
+
2
].
red
,
(
int
)
img
[
r
+
1
][
c
+
3
].
red
,
(
int
)
img
[
r
+
1
][
c
+
4
].
red
,
(
int
)
img
[
r
+
1
][
c
+
5
].
red
,
(
int
)
img
[
r
+
1
][
c
+
6
].
red
,
(
int
)
img
[
r
+
1
][
c
+
7
].
red
);
simd8i
gleft
((
int
)
img
[
r
][
c
-
1
].
green
,
(
int
)
img
[
r
][
c
].
green
,
(
int
)
img
[
r
][
c
+
1
].
green
,
(
int
)
img
[
r
][
c
+
2
].
green
,
(
int
)
img
[
r
][
c
+
3
].
green
,
(
int
)
img
[
r
][
c
+
4
].
green
,
(
int
)
img
[
r
][
c
+
5
].
green
,
(
int
)
img
[
r
][
c
+
6
].
green
);
simd8i
gright
((
int
)
img
[
r
][
c
+
1
].
green
,
(
int
)
img
[
r
][
c
+
2
].
green
,
(
int
)
img
[
r
][
c
+
3
].
green
,
(
int
)
img
[
r
][
c
+
4
].
green
,
(
int
)
img
[
r
][
c
+
5
].
green
,
(
int
)
img
[
r
][
c
+
6
].
green
,
(
int
)
img
[
r
][
c
+
7
].
green
,
(
int
)
img
[
r
][
c
+
8
].
green
);
simd8i
gtop
((
int
)
img
[
r
-
1
][
c
].
green
,
(
int
)
img
[
r
-
1
][
c
+
1
].
green
,
(
int
)
img
[
r
-
1
][
c
+
2
].
green
,
(
int
)
img
[
r
-
1
][
c
+
3
].
green
,
(
int
)
img
[
r
-
1
][
c
+
4
].
green
,
(
int
)
img
[
r
-
1
][
c
+
5
].
green
,
(
int
)
img
[
r
-
1
][
c
+
6
].
green
,
(
int
)
img
[
r
-
1
][
c
+
7
].
green
);
simd8i
gbottom
((
int
)
img
[
r
+
1
][
c
].
green
,
(
int
)
img
[
r
+
1
][
c
+
1
].
green
,
(
int
)
img
[
r
+
1
][
c
+
2
].
green
,
(
int
)
img
[
r
+
1
][
c
+
3
].
green
,
(
int
)
img
[
r
+
1
][
c
+
4
].
green
,
(
int
)
img
[
r
+
1
][
c
+
5
].
green
,
(
int
)
img
[
r
+
1
][
c
+
6
].
green
,
(
int
)
img
[
r
+
1
][
c
+
7
].
green
);
simd8i
bleft
((
int
)
img
[
r
][
c
-
1
].
blue
,
(
int
)
img
[
r
][
c
].
blue
,
(
int
)
img
[
r
][
c
+
1
].
blue
,
(
int
)
img
[
r
][
c
+
2
].
blue
,
(
int
)
img
[
r
][
c
+
3
].
blue
,
(
int
)
img
[
r
][
c
+
4
].
blue
,
(
int
)
img
[
r
][
c
+
5
].
blue
,
(
int
)
img
[
r
][
c
+
6
].
blue
);
simd8i
bright
((
int
)
img
[
r
][
c
+
1
].
blue
,
(
int
)
img
[
r
][
c
+
2
].
blue
,
(
int
)
img
[
r
][
c
+
3
].
blue
,
(
int
)
img
[
r
][
c
+
4
].
blue
,
(
int
)
img
[
r
][
c
+
5
].
blue
,
(
int
)
img
[
r
][
c
+
6
].
blue
,
(
int
)
img
[
r
][
c
+
7
].
blue
,
(
int
)
img
[
r
][
c
+
8
].
blue
);
simd8i
btop
((
int
)
img
[
r
-
1
][
c
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
1
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
2
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
3
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
4
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
5
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
6
].
blue
,
(
int
)
img
[
r
-
1
][
c
+
7
].
blue
);
simd8i
bbottom
((
int
)
img
[
r
+
1
][
c
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
1
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
2
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
3
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
4
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
5
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
6
].
blue
,
(
int
)
img
[
r
+
1
][
c
+
7
].
blue
);
simd8i
grad_x_red
=
rright
-
rleft
;
simd8i
grad_y_red
=
rbottom
-
rtop
;
simd8i
grad_x_green
=
gright
-
gleft
;
simd8i
grad_y_green
=
gbottom
-
gtop
;
simd8i
grad_x_blue
=
bright
-
bleft
;
simd8i
grad_y_blue
=
bbottom
-
btop
;
simd8i
rlen
=
grad_x_red
*
grad_x_red
+
grad_y_red
*
grad_y_red
;
simd8i
glen
=
grad_x_green
*
grad_x_green
+
grad_y_green
*
grad_y_green
;
simd8i
blen
=
grad_x_blue
*
grad_x_blue
+
grad_y_blue
*
grad_y_blue
;
simd8i
cmp
=
rlen
>
glen
;
simd8i
tgrad_x
=
select
(
cmp
,
grad_x_red
,
grad_x_green
);
simd8i
tgrad_y
=
select
(
cmp
,
grad_y_red
,
grad_y_green
);
simd8i
tlen
=
select
(
cmp
,
rlen
,
glen
);
cmp
=
tlen
>
blen
;
grad_x
=
select
(
cmp
,
tgrad_x
,
grad_x_blue
);
grad_y
=
select
(
cmp
,
tgrad_y
,
grad_y_blue
);
len
=
select
(
cmp
,
tlen
,
blen
);
}
// ------------------------------------------------------------------------------------
template
<
typename
image_type
,
typename
T
>
template
<
typename
image_type
,
typename
T
>
inline
typename
dlib
::
disable_if_c
<
pixel_traits
<
typename
image_type
::
pixel_type
>::
rgb
>::
type
get_gradient
(
inline
typename
dlib
::
disable_if_c
<
pixel_traits
<
typename
image_type
::
pixel_type
>::
rgb
>::
type
get_gradient
(
const
int
r
,
const
int
r
,
...
@@ -194,6 +327,58 @@ namespace dlib
...
@@ -194,6 +327,58 @@ namespace dlib
// ------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------
template
<
typename
image_type
>
inline
typename
dlib
::
disable_if_c
<
pixel_traits
<
typename
image_type
::
pixel_type
>::
rgb
>::
type
get_gradient
(
int
r
,
int
c
,
const
image_type
&
img
,
simd8f
&
grad_x
,
simd8f
&
grad_y
,
simd8f
&
len
)
{
simd8i
left
((
int
)
get_pixel_intensity
(
img
[
r
][
c
-
1
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
1
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
2
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
3
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
4
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
5
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
6
]));
simd8i
right
((
int
)
get_pixel_intensity
(
img
[
r
][
c
+
1
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
2
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
3
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
4
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
5
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
6
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
7
]),
(
int
)
get_pixel_intensity
(
img
[
r
][
c
+
8
]));
simd8i
top
((
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
1
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
2
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
3
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
4
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
5
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
6
]),
(
int
)
get_pixel_intensity
(
img
[
r
-
1
][
c
+
7
]));
simd8i
bottom
((
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
1
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
2
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
3
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
4
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
5
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
6
]),
(
int
)
get_pixel_intensity
(
img
[
r
+
1
][
c
+
7
]));
grad_x
=
right
-
left
;
grad_y
=
bottom
-
top
;
len
=
(
grad_x
*
grad_x
+
grad_y
*
grad_y
);
}
// ------------------------------------------------------------------------------------
template
<
typename
T
,
typename
mm1
,
typename
mm2
>
template
<
typename
T
,
typename
mm1
,
typename
mm2
>
inline
void
set_hog
(
inline
void
set_hog
(
dlib
::
array
<
array2d
<
T
,
mm1
>
,
mm2
>&
hog
,
dlib
::
array
<
array2d
<
T
,
mm1
>
,
mm2
>&
hog
,
...
@@ -376,43 +561,50 @@ namespace dlib
...
@@ -376,43 +561,50 @@ namespace dlib
for
(
int
y
=
1
;
y
<
visible_nr
;
y
++
)
for
(
int
y
=
1
;
y
<
visible_nr
;
y
++
)
{
{
int
x
;
int
x
;
for
(
x
=
1
;
x
<
visible_nc
-
3
;
x
+=
4
)
for
(
x
=
1
;
x
<
visible_nc
-
7
;
x
+=
8
)
{
{
// v will be the length of the gradient vectors.
// v will be the length of the gradient vectors.
simd
4
f
grad_x
,
grad_y
,
v
;
simd
8
f
grad_x
,
grad_y
,
v
;
get_gradient
(
y
,
x
,
img
,
grad_x
,
grad_y
,
v
);
get_gradient
(
y
,
x
,
img
,
grad_x
,
grad_y
,
v
);
float
_vv
[
4
];
float
_vv
[
4
];
v
.
store
(
_vv
);
v
.
store
(
_vv
);
// Now snap the gradient to one of 18 orientations
// Now snap the gradient to one of 18 orientations
simd
4
f
best_dot
=
0
;
simd
8
f
best_dot
=
0
;
simd
4
f
best_o
=
0
;
simd
8
f
best_o
=
0
;
for
(
int
o
=
0
;
o
<
9
;
o
++
)
for
(
int
o
=
0
;
o
<
9
;
o
++
)
{
{
simd
4
f
dot
=
grad_x
*
directions
[
o
](
0
)
+
grad_y
*
directions
[
o
](
1
);
simd
8
f
dot
=
grad_x
*
directions
[
o
](
0
)
+
grad_y
*
directions
[
o
](
1
);
simd
4
f_bool
cmp
=
dot
>
best_dot
;
simd
8
f_bool
cmp
=
dot
>
best_dot
;
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
dot
*=
-
1
;
dot
*=
-
1
;
best_o
=
select
(
cmp
,
o
,
best_o
);
best_o
=
select
(
cmp
,
o
,
best_o
);
cmp
=
dot
>
best_dot
;
cmp
=
dot
>
best_dot
;
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_o
=
select
(
cmp
,
o
+
9
,
best_o
);
best_o
=
select
(
cmp
,
o
+
9
,
best_o
);
}
}
int32
_best_o
[
4
];
simd4i
(
best_o
).
store
(
_best_o
);
int32
_best_o
[
8
];
simd8i
(
best_o
).
store
(
_best_o
);
norm
[
y
][
x
+
0
]
=
_vv
[
0
];
norm
[
y
][
x
+
1
]
=
_vv
[
1
];
norm
[
y
][
x
+
2
]
=
_vv
[
2
];
norm
[
y
][
x
+
3
]
=
_vv
[
3
];
angle
[
y
][
x
+
0
]
=
_best_o
[
0
];
norm
[
y
][
x
+
0
]
=
_vv
[
0
];
angle
[
y
][
x
+
1
]
=
_best_o
[
1
];
norm
[
y
][
x
+
1
]
=
_vv
[
1
];
angle
[
y
][
x
+
2
]
=
_best_o
[
2
];
norm
[
y
][
x
+
2
]
=
_vv
[
2
];
angle
[
y
][
x
+
3
]
=
_best_o
[
3
];
norm
[
y
][
x
+
3
]
=
_vv
[
3
];
norm
[
y
][
x
+
4
]
=
_vv
[
4
];
norm
[
y
][
x
+
5
]
=
_vv
[
5
];
norm
[
y
][
x
+
6
]
=
_vv
[
6
];
norm
[
y
][
x
+
7
]
=
_vv
[
7
];
angle
[
y
][
x
+
0
]
=
_best_o
[
0
];
angle
[
y
][
x
+
1
]
=
_best_o
[
1
];
angle
[
y
][
x
+
2
]
=
_best_o
[
2
];
angle
[
y
][
x
+
3
]
=
_best_o
[
3
];
angle
[
y
][
x
+
4
]
=
_best_o
[
4
];
angle
[
y
][
x
+
5
]
=
_best_o
[
5
];
angle
[
y
][
x
+
6
]
=
_best_o
[
6
];
angle
[
y
][
x
+
7
]
=
_best_o
[
7
];
}
}
// Now process the right columns that don't fit into simd registers.
// Now process the right columns that don't fit into simd registers.
for
(;
x
<
visible_nc
;
x
++
)
for
(;
x
<
visible_nc
;
x
++
)
...
@@ -634,37 +826,37 @@ namespace dlib
...
@@ -634,37 +826,37 @@ namespace dlib
const
float
vy0
=
yp
-
iyp
;
const
float
vy0
=
yp
-
iyp
;
const
float
vy1
=
1
.
0
-
vy0
;
const
float
vy1
=
1
.
0
-
vy0
;
int
x
;
int
x
;
for
(
x
=
1
;
x
<
visible_nc
-
3
;
x
+=
4
)
for
(
x
=
1
;
x
<
visible_nc
-
7
;
x
+=
8
)
{
{
simd
4f
xx
(
x
,
x
+
1
,
x
+
2
,
x
+
3
);
simd
8f
xx
(
x
,
x
+
1
,
x
+
2
,
x
+
3
,
x
+
4
,
x
+
5
,
x
+
6
,
x
+
7
);
// v will be the length of the gradient vectors.
// v will be the length of the gradient vectors.
simd
4
f
grad_x
,
grad_y
,
v
;
simd
8
f
grad_x
,
grad_y
,
v
;
get_gradient
(
y
,
x
,
img
,
grad_x
,
grad_y
,
v
);
get_gradient
(
y
,
x
,
img
,
grad_x
,
grad_y
,
v
);
// We will use bilinear interpolation to add into the histogram bins.
// We will use bilinear interpolation to add into the histogram bins.
// So first we precompute the values needed to determine how much each
// So first we precompute the values needed to determine how much each
// pixel votes into each bin.
// pixel votes into each bin.
simd
4f
xp
=
(
xx
+
0
.
5
)
/
(
float
)
cell_size
+
0
.
5
;
simd
8f
xp
=
(
xx
+
0
.
5
)
/
(
float
)
cell_size
+
0
.
5
;
simd
4i
ixp
=
simd4
i
(
xp
);
simd
8i
ixp
=
simd8
i
(
xp
);
simd
4f
vx0
=
xp
-
ixp
;
simd
8f
vx0
=
xp
-
ixp
;
simd
4f
vx1
=
1
.
0
f
-
vx0
;
simd
8f
vx1
=
1
.
0
f
-
vx0
;
v
=
sqrt
(
v
);
v
=
sqrt
(
v
);
// Now snap the gradient to one of 18 orientations
// Now snap the gradient to one of 18 orientations
simd
4
f
best_dot
=
0
;
simd
8
f
best_dot
=
0
;
simd
4
f
best_o
=
0
;
simd
8
f
best_o
=
0
;
for
(
int
o
=
0
;
o
<
9
;
o
++
)
for
(
int
o
=
0
;
o
<
9
;
o
++
)
{
{
simd
4
f
dot
=
grad_x
*
directions
[
o
](
0
)
+
grad_y
*
directions
[
o
](
1
);
simd
8
f
dot
=
grad_x
*
directions
[
o
](
0
)
+
grad_y
*
directions
[
o
](
1
);
simd
4
f_bool
cmp
=
dot
>
best_dot
;
simd
8
f_bool
cmp
=
dot
>
best_dot
;
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
dot
*=
-
1
;
dot
*=
-
1
;
best_o
=
select
(
cmp
,
o
,
best_o
);
best_o
=
select
(
cmp
,
o
,
best_o
);
cmp
=
dot
>
best_dot
;
cmp
=
dot
>
best_dot
;
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_dot
=
select
(
cmp
,
dot
,
best_dot
);
best_o
=
select
(
cmp
,
o
+
9
,
best_o
);
best_o
=
select
(
cmp
,
o
+
9
,
best_o
);
}
}
...
@@ -673,37 +865,57 @@ namespace dlib
...
@@ -673,37 +865,57 @@ namespace dlib
vx1
*=
v
;
vx1
*=
v
;
vx0
*=
v
;
vx0
*=
v
;
// The amounts for each bin
// The amounts for each bin
simd4f
v11
=
vy1
*
vx1
;
simd8f
v11
=
vy1
*
vx1
;
simd4f
v01
=
vy0
*
vx1
;
simd8f
v01
=
vy0
*
vx1
;
simd4f
v10
=
vy1
*
vx0
;
simd8f
v10
=
vy1
*
vx0
;
simd4f
v00
=
vy0
*
vx0
;
simd8f
v00
=
vy0
*
vx0
;
int32
_best_o
[
4
];
simd4i
(
best_o
).
store
(
_best_o
);
int32
_best_o
[
8
];
simd8i
(
best_o
).
store
(
_best_o
);
int32
_ixp
[
4
];
ixp
.
store
(
_ixp
);
int32
_ixp
[
8
];
ixp
.
store
(
_ixp
);
float
_v11
[
4
];
v11
.
store
(
_v11
);
float
_v11
[
8
];
v11
.
store
(
_v11
);
float
_v01
[
4
];
v01
.
store
(
_v01
);
float
_v01
[
8
];
v01
.
store
(
_v01
);
float
_v10
[
4
];
v10
.
store
(
_v10
);
float
_v10
[
8
];
v10
.
store
(
_v10
);
float
_v00
[
4
];
v00
.
store
(
_v00
);
float
_v00
[
8
];
v00
.
store
(
_v00
);
hist
[
iyp
+
1
]
[
_ixp
[
0
]
](
_best_o
[
0
])
+=
_v11
[
0
];
hist
[
iyp
+
1
][
_ixp
[
0
]](
_best_o
[
0
])
+=
_v11
[
0
];
hist
[
iyp
+
1
+
1
][
_ixp
[
0
]
](
_best_o
[
0
])
+=
_v01
[
0
];
hist
[
iyp
+
1
+
1
][
_ixp
[
0
]](
_best_o
[
0
])
+=
_v01
[
0
];
hist
[
iyp
+
1
]
[
_ixp
[
0
]
+
1
](
_best_o
[
0
])
+=
_v10
[
0
];
hist
[
iyp
+
1
][
_ixp
[
0
]
+
1
](
_best_o
[
0
])
+=
_v10
[
0
];
hist
[
iyp
+
1
+
1
][
_ixp
[
0
]
+
1
](
_best_o
[
0
])
+=
_v00
[
0
];
hist
[
iyp
+
1
+
1
][
_ixp
[
0
]
+
1
](
_best_o
[
0
])
+=
_v00
[
0
];
hist
[
iyp
+
1
]
[
_ixp
[
1
]
](
_best_o
[
1
])
+=
_v11
[
1
];
hist
[
iyp
+
1
][
_ixp
[
1
]](
_best_o
[
1
])
+=
_v11
[
1
];
hist
[
iyp
+
1
+
1
][
_ixp
[
1
]
](
_best_o
[
1
])
+=
_v01
[
1
];
hist
[
iyp
+
1
+
1
][
_ixp
[
1
]](
_best_o
[
1
])
+=
_v01
[
1
];
hist
[
iyp
+
1
]
[
_ixp
[
1
]
+
1
](
_best_o
[
1
])
+=
_v10
[
1
];
hist
[
iyp
+
1
][
_ixp
[
1
]
+
1
](
_best_o
[
1
])
+=
_v10
[
1
];
hist
[
iyp
+
1
+
1
][
_ixp
[
1
]
+
1
](
_best_o
[
1
])
+=
_v00
[
1
];
hist
[
iyp
+
1
+
1
][
_ixp
[
1
]
+
1
](
_best_o
[
1
])
+=
_v00
[
1
];
hist
[
iyp
+
1
]
[
_ixp
[
2
]
](
_best_o
[
2
])
+=
_v11
[
2
];
hist
[
iyp
+
1
][
_ixp
[
2
]](
_best_o
[
2
])
+=
_v11
[
2
];
hist
[
iyp
+
1
+
1
][
_ixp
[
2
]
](
_best_o
[
2
])
+=
_v01
[
2
];
hist
[
iyp
+
1
+
1
][
_ixp
[
2
]](
_best_o
[
2
])
+=
_v01
[
2
];
hist
[
iyp
+
1
]
[
_ixp
[
2
]
+
1
](
_best_o
[
2
])
+=
_v10
[
2
];
hist
[
iyp
+
1
][
_ixp
[
2
]
+
1
](
_best_o
[
2
])
+=
_v10
[
2
];
hist
[
iyp
+
1
+
1
][
_ixp
[
2
]
+
1
](
_best_o
[
2
])
+=
_v00
[
2
];
hist
[
iyp
+
1
+
1
][
_ixp
[
2
]
+
1
](
_best_o
[
2
])
+=
_v00
[
2
];
hist
[
iyp
+
1
]
[
_ixp
[
3
]
](
_best_o
[
3
])
+=
_v11
[
3
];
hist
[
iyp
+
1
][
_ixp
[
3
]](
_best_o
[
3
])
+=
_v11
[
3
];
hist
[
iyp
+
1
+
1
][
_ixp
[
3
]
](
_best_o
[
3
])
+=
_v01
[
3
];
hist
[
iyp
+
1
+
1
][
_ixp
[
3
]](
_best_o
[
3
])
+=
_v01
[
3
];
hist
[
iyp
+
1
]
[
_ixp
[
3
]
+
1
](
_best_o
[
3
])
+=
_v10
[
3
];
hist
[
iyp
+
1
][
_ixp
[
3
]
+
1
](
_best_o
[
3
])
+=
_v10
[
3
];
hist
[
iyp
+
1
+
1
][
_ixp
[
3
]
+
1
](
_best_o
[
3
])
+=
_v00
[
3
];
hist
[
iyp
+
1
+
1
][
_ixp
[
3
]
+
1
](
_best_o
[
3
])
+=
_v00
[
3
];
hist
[
iyp
+
1
][
_ixp
[
4
]](
_best_o
[
4
])
+=
_v11
[
4
];
hist
[
iyp
+
1
+
1
][
_ixp
[
4
]](
_best_o
[
4
])
+=
_v01
[
4
];
hist
[
iyp
+
1
][
_ixp
[
4
]
+
1
](
_best_o
[
4
])
+=
_v10
[
4
];
hist
[
iyp
+
1
+
1
][
_ixp
[
4
]
+
1
](
_best_o
[
4
])
+=
_v00
[
4
];
hist
[
iyp
+
1
][
_ixp
[
5
]](
_best_o
[
5
])
+=
_v11
[
5
];
hist
[
iyp
+
1
+
1
][
_ixp
[
5
]](
_best_o
[
5
])
+=
_v01
[
5
];
hist
[
iyp
+
1
][
_ixp
[
5
]
+
1
](
_best_o
[
5
])
+=
_v10
[
5
];
hist
[
iyp
+
1
+
1
][
_ixp
[
5
]
+
1
](
_best_o
[
5
])
+=
_v00
[
5
];
hist
[
iyp
+
1
][
_ixp
[
6
]](
_best_o
[
6
])
+=
_v11
[
6
];
hist
[
iyp
+
1
+
1
][
_ixp
[
6
]](
_best_o
[
6
])
+=
_v01
[
6
];
hist
[
iyp
+
1
][
_ixp
[
6
]
+
1
](
_best_o
[
6
])
+=
_v10
[
6
];
hist
[
iyp
+
1
+
1
][
_ixp
[
6
]
+
1
](
_best_o
[
6
])
+=
_v00
[
6
];
hist
[
iyp
+
1
][
_ixp
[
7
]](
_best_o
[
7
])
+=
_v11
[
7
];
hist
[
iyp
+
1
+
1
][
_ixp
[
7
]](
_best_o
[
7
])
+=
_v01
[
7
];
hist
[
iyp
+
1
][
_ixp
[
7
]
+
1
](
_best_o
[
7
])
+=
_v10
[
7
];
hist
[
iyp
+
1
+
1
][
_ixp
[
7
]
+
1
](
_best_o
[
7
])
+=
_v00
[
7
];
}
}
// Now process the right columns that don't fit into simd registers.
// Now process the right columns that don't fit into simd registers.
for
(;
x
<
visible_nc
;
x
++
)
for
(;
x
<
visible_nc
;
x
++
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment