Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
80d36f43
Commit
80d36f43
authored
Nov 11, 2013
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fleshed out the AVX SIMD support
parent
4fec4476
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
609 additions
and
15 deletions
+609
-15
simd.h
dlib/simd.h
+1
-0
simd8f.h
dlib/simd/simd8f.h
+244
-0
simd8i.h
dlib/simd/simd8i.h
+339
-0
simd_check.h
dlib/simd/simd_check.h
+25
-15
No files found.
dlib/simd.h
View file @
80d36f43
...
...
@@ -6,6 +6,7 @@
#include "simd/simd4f.h"
#include "simd/simd4i.h"
#include "simd/simd8f.h"
#include "simd/simd8i.h"
#endif // DLIB_SIMd_H__
dlib/simd/simd8f.h
View file @
80d36f43
...
...
@@ -5,6 +5,7 @@
#include "simd_check.h"
#include "simd4f.h"
#include "simd8i.h"
namespace
dlib
...
...
@@ -24,6 +25,7 @@ namespace dlib
inline
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
{
x
=
_mm256_setr_ps
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8f
(
const
simd8i
&
val
)
:
x
(
_mm256_cvtepi32_ps
(
val
))
{}
simd8f
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f
&
operator
=
(
const
__m256
&
val
)
{
...
...
@@ -32,6 +34,9 @@ namespace dlib
}
inline
operator
__m256
()
const
{
return
x
;
}
// truncate to 32bit integers
operator
__m256i
()
const
{
return
_mm256_cvttps_epi32
(
x
);
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_ps
(
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_ps
(
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_ps
(
ptr
);
}
...
...
@@ -51,6 +56,33 @@ namespace dlib
private
:
__m256
x
;
};
class
simd8f_bool
{
public
:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f_bool
(
const
simd4f_bool
&
low
,
const
simd4f_bool
&
high
)
{
x
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
low
),
high
,
1
);
}
simd8f_bool
&
operator
=
(
const
__m256
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256
()
const
{
return
x
;
}
private
:
__m256
x
;
};
#else
class
simd8f
{
...
...
@@ -62,6 +94,16 @@ namespace dlib
simd8f
(
float
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
simd8f
(
const
simd8i
&
val
)
:
_low
(
val
.
low
()),
_high
(
val
.
high
())
{
}
// truncate to 32bit integers
operator
simd8i
::
rawarray
()
const
{
simd8i
::
rawarray
temp
;
temp
.
low
=
_low
;
temp
.
high
=
_high
;
return
temp
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
...
...
@@ -83,6 +125,21 @@ namespace dlib
private
:
simd4f
_low
,
_high
;
};
class
simd8f_bool
{
public
:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
simd4f_bool
&
low_
,
const
simd4f_bool
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd4f_bool
low
()
const
{
return
_low
;
}
simd4f_bool
high
()
const
{
return
_high
;
}
private
:
simd4f_bool
_low
,
_high
;
};
#endif
// ----------------------------------------------------------------------------------------
...
...
@@ -110,6 +167,20 @@ namespace dlib
inline
simd8f
&
operator
+=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
+
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
-
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_sub_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
lhs
.
low
()
-
rhs
.
low
(),
lhs
.
high
()
-
rhs
.
high
());
#endif
}
inline
simd8f
&
operator
-=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
-
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
*
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
...
...
@@ -124,6 +195,130 @@ namespace dlib
inline
simd8f
&
operator
*=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
*
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f
operator
/
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_div_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
lhs
.
low
()
/
rhs
.
low
(),
lhs
.
high
()
/
rhs
.
high
());
#endif
}
inline
simd8f
&
operator
/=
(
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
lhs
=
lhs
/
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
==
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
0
);
#else
return
simd8f_bool
(
lhs
.
low
()
==
rhs
.
low
(),
lhs
.
high
()
==
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
!=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
4
);
#else
return
simd8f_bool
(
lhs
.
low
()
!=
rhs
.
low
(),
lhs
.
high
()
!=
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
<
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
1
);
#else
return
simd8f_bool
(
lhs
.
low
()
<
rhs
.
low
(),
lhs
.
high
()
<
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
>
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
rhs
<
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
<=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_cmp_ps
(
lhs
,
rhs
,
2
);
#else
return
simd8f_bool
(
lhs
.
low
()
<=
rhs
.
low
(),
lhs
.
high
()
<=
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f_bool
operator
>=
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
return
rhs
<=
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8f
min
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_min_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
min
(
lhs
.
low
(),
rhs
.
low
()),
min
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
max
(
const
simd8f
&
lhs
,
const
simd8f
&
rhs
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_max_ps
(
lhs
,
rhs
);
#else
return
simd8f
(
max
(
lhs
.
low
(),
rhs
.
low
()),
max
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
reciprocal
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_rcp_ps
(
item
);
#else
return
simd8f
(
reciprocal
(
item
.
low
()),
reciprocal
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
reciprocal_sqrt
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_rsqrt_ps
(
item
);
#else
return
simd8f
(
reciprocal_sqrt
(
item
.
low
()),
reciprocal_sqrt
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
float
sum
(
const
simd8f
&
item
)
...
...
@@ -144,6 +339,55 @@ namespace dlib
return
sum
(
lhs
*
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8f
sqrt
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_sqrt_ps
(
item
);
#else
return
simd8f
(
sqrt
(
item
.
low
()),
sqrt
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
ceil
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_ceil_ps
(
item
);
#else
return
simd8f
(
ceil
(
item
.
low
()),
ceil
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8f
floor
(
const
simd8f
&
item
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_floor_ps
(
item
);
#else
return
simd8f
(
floor
(
item
.
low
()),
floor
(
item
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
// perform cmp ? a : b
inline
simd8f
select
(
const
simd8f_bool
&
cmp
,
const
simd8f
&
a
,
const
simd8f
&
b
)
{
#ifdef DLIB_HAVE_AVX
return
_mm256_blendv_ps
(
b
,
a
,
cmp
);
#else
return
simd8f
(
select
(
cmp
.
low
(),
a
.
low
(),
b
.
low
()),
select
(
cmp
.
high
(),
a
.
high
(),
b
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
}
...
...
dlib/simd/simd8i.h
0 → 100644
View file @
80d36f43
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_sIMD8I_H__
#define DLIB_sIMD8I_H__
#include "simd_check.h"
#include "../uintn.h"
namespace
dlib
{
#ifdef DLIB_HAVE_AVX
class
simd8i
{
public
:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
int32
f
)
{
x
=
_mm256_set1_epi32
(
f
);
}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
{
x
=
_mm256_setr_epi32
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8i
(
const
__m256i
&
val
)
:
x
(
val
)
{}
simd8i
(
const
simd4i
&
low
,
const
simd4i
&
high
)
{
x
=
_mm256_insertf128_si256
(
_mm256_castsi128_si256
(
low
),
high
,
1
);
}
simd8i
&
operator
=
(
const
__m256i
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256i
()
const
{
return
x
;
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_si256
((
const
__m256i
*
)
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_si256
((
__m256i
*
)
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_si256
((
const
__m256i
*
)
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
x
);
}
simd4i
low
()
const
{
return
_mm256_castsi256_si128
(
x
);
}
simd4i
high
()
const
{
return
_mm256_extractf128_si256
(
x
,
1
);
}
unsigned
int
size
()
const
{
return
4
;
}
int32
operator
[](
unsigned
int
idx
)
const
{
int32
temp
[
8
];
store
(
temp
);
return
temp
[
idx
];
}
private
:
__m256i
x
;
};
#else
class
simd8i
{
public
:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
const
simd4i
&
low_
,
const
simd4i
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd8i
(
int32
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
struct
rawarray
{
simd4i
low
,
high
;
};
simd8i
(
const
rawarray
&
a
)
{
_low
=
a
.
low
;
_high
=
a
.
high
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
unsigned
int
size
()
const
{
return
8
;
}
int32
operator
[](
unsigned
int
idx
)
const
{
if
(
idx
<
4
)
return
_low
[
idx
];
else
return
_high
[
idx
-
4
];
}
simd4i
low
()
const
{
return
_low
;
}
simd4i
high
()
const
{
return
_high
;
}
private
:
simd4i
_low
,
_high
;
};
#endif
// ----------------------------------------------------------------------------------------
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
simd8i
&
item
)
{
int32
temp
[
8
];
item
.
store
(
temp
);
out
<<
"("
<<
temp
[
0
]
<<
", "
<<
temp
[
1
]
<<
", "
<<
temp
[
2
]
<<
", "
<<
temp
[
3
]
<<
", "
<<
temp
[
4
]
<<
", "
<<
temp
[
5
]
<<
", "
<<
temp
[
6
]
<<
", "
<<
temp
[
7
]
<<
")"
;
return
out
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
+
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_add_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
+
rhs
.
low
(),
lhs
.
high
()
+
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
+=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
+
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
-
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sub_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
-
rhs
.
low
(),
lhs
.
high
()
-
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
-=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
-
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
*
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_mullo_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
*
rhs
.
low
(),
lhs
.
high
()
*
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
*=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
*
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
&
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_and_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
&
rhs
.
low
(),
lhs
.
high
()
&
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
&=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
&
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
|
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_or_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
|
rhs
.
low
(),
lhs
.
high
()
|
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
|=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
|
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
^
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_xor_si256
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
^
rhs
.
low
(),
lhs
.
high
()
^
rhs
.
high
());
#endif
}
inline
simd8i
&
operator
^=
(
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
lhs
=
lhs
^
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
~
(
const
simd8i
&
lhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_xor_si256
(
lhs
,
_mm256_set1_epi32
(
0xFFFFFFFF
));
#else
return
simd8i
(
~
lhs
.
low
(),
~
lhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<<
(
const
simd8i
&
lhs
,
const
int
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sll_epi32
(
lhs
,
_mm_cvtsi32_si128
(
rhs
));
#else
return
simd8i
(
lhs
.
low
()
<<
rhs
,
lhs
.
high
()
<<
rhs
);
#endif
}
inline
simd8i
&
operator
<<=
(
simd8i
&
lhs
,
const
int
&
rhs
)
{
return
lhs
=
lhs
<<
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>>
(
const
simd8i
&
lhs
,
const
int
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_sra_epi32
(
lhs
,
_mm_cvtsi32_si128
(
rhs
));
#else
return
simd8i
(
lhs
.
low
()
>>
rhs
,
lhs
.
high
()
>>
rhs
);
#endif
}
inline
simd8i
&
operator
>>=
(
simd8i
&
lhs
,
const
int
&
rhs
)
{
return
lhs
=
lhs
>>
rhs
;
return
lhs
;}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
==
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_cmpeq_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
==
rhs
.
low
(),
lhs
.
high
()
==
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
!=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
~
(
lhs
==
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_cmpgt_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
lhs
.
low
()
>
rhs
.
low
(),
lhs
.
high
()
>
rhs
.
high
());
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
rhs
>
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
<=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
~
(
lhs
>
rhs
);
}
// ----------------------------------------------------------------------------------------
inline
simd8i
operator
>=
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
return
rhs
<=
lhs
;
}
// ----------------------------------------------------------------------------------------
inline
simd8i
min
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_min_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
min
(
lhs
.
low
(),
rhs
.
low
()),
min
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
simd8i
max
(
const
simd8i
&
lhs
,
const
simd8i
&
rhs
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_max_epi32
(
lhs
,
rhs
);
#else
return
simd8i
(
max
(
lhs
.
low
(),
rhs
.
low
()),
max
(
lhs
.
high
(),
rhs
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
inline
int32
sum
(
const
simd8i
&
item
)
{
return
sum
(
item
.
low
()
+
item
.
high
());
}
// ----------------------------------------------------------------------------------------
// perform cmp ? a : b
inline
simd8i
select
(
const
simd8i
&
cmp
,
const
simd8i
&
a
,
const
simd8i
&
b
)
{
#ifdef DLIB_HAVE_AVX2
return
_mm256_blendv_epi8
(
b
,
a
,
cmp
);
#else
return
simd8i
(
select
(
cmp
.
low
(),
a
.
low
(),
b
.
low
()),
select
(
cmp
.
high
(),
a
.
high
(),
b
.
high
()));
#endif
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_sIMD8I_H__
dlib/simd/simd_check.h
View file @
80d36f43
...
...
@@ -3,6 +3,7 @@
#ifndef DLIB_SIMd_CHECK_H__
#define DLIB_SIMd_CHECK_H__
//#define DLIB_DO_NOT_USE_SIMD
// figure out which SIMD instructions we can use.
#ifndef DLIB_DO_NOT_USE_SIMD
...
...
@@ -27,29 +28,38 @@
#ifdef __AVX__
#define DLIB_HAVE_AVX
#endif
#ifdef __AVX2__
#define DLIB_HAVE_AVX2
#endif
#endif
#endif
// ----------------------------------------------------------------------------------------
#ifdef DLIB_HAVE_SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
#include <mmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE3
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE41
#include <smmintrin.h> // SSE4
#endif
#ifdef DLIB_HAVE_AVX
#include <immintrin.h> // AVX
#ifdef __GNUC__
#include <x86intrin.h>
#else
#ifdef DLIB_HAVE_SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
#include <mmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE3
#include <pmmintrin.h> // SSE3
#include <tmmintrin.h>
#endif
#ifdef DLIB_HAVE_SSE41
#include <smmintrin.h> // SSE4
#endif
#ifdef DLIB_HAVE_AVX
#include <immintrin.h> // AVX
#endif
#ifdef DLIB_HAVE_AVX2
#include <avx2intrin.h>
#endif
#endif
#endif // DLIB_SIMd_CHECK_H__
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment