Commit 60092335 authored by David Miller's avatar David Miller Committed by Davis E. King

GCC/Clang compatible SIMD code ./dlib/simd/simd**_vec.h (#414)

* GCC/Clang compatible vector extension SIMD code

* Minimal modifications to dlib for the simd_vec code to work, a few include changes and ifdefs

* Changed tabbing to spaces

* Allow type inference to binary ops on different types of same size

* Added cmake option USE_AUTO_VECTOR, and fixed up preprocessor checks.
It is required to build with gcc/clang auto vectorization

* Changed to intrinsic version due to poor auto vectorization results.
The simd8*_vec are just copies of the C code right now.

* Removed _vec variants, added to existing defines. simd_check.h back in place and removed from dlib/simd.h
parent f6ece5d2
...@@ -32,9 +32,10 @@ endif() ...@@ -32,9 +32,10 @@ endif()
set(gcc_like_compilers GNU Clang Intel) set(gcc_like_compilers GNU Clang Intel)
set(intel_archs x86_64 i386 i686) set(intel_archs x86_64 i386 i686)
# Setup some options to allow a user to enable SSE and AVX instruction use. # Setup some options to allow a user to enable SSE and AVX instruction use.
if ((";${gcc_like_compilers};" MATCHES ";${CMAKE_CXX_COMPILER_ID};") AND if ((";${gcc_like_compilers};" MATCHES ";${CMAKE_CXX_COMPILER_ID};") AND
(";${intel_archs};" MATCHES ";${CMAKE_SYSTEM_PROCESSOR};")) (";${intel_archs};" MATCHES ";${CMAKE_SYSTEM_PROCESSOR};") AND NOT USE_AUTO_VECTOR)
option(USE_SSE2_INSTRUCTIONS "Compile your program with SSE2 instructions" OFF) option(USE_SSE2_INSTRUCTIONS "Compile your program with SSE2 instructions" OFF)
option(USE_SSE4_INSTRUCTIONS "Compile your program with SSE4 instructions" OFF) option(USE_SSE4_INSTRUCTIONS "Compile your program with SSE4 instructions" OFF)
option(USE_AVX_INSTRUCTIONS "Compile your program with AVX instructions" OFF) option(USE_AVX_INSTRUCTIONS "Compile your program with AVX instructions" OFF)
......
...@@ -11,8 +11,7 @@ ...@@ -11,8 +11,7 @@
#include "assign_image.h" #include "assign_image.h"
#include "draw.h" #include "draw.h"
#include "interpolation.h" #include "interpolation.h"
#include "../simd/simd4i.h" #include "../simd.h"
#include "../simd/simd4f.h"
namespace dlib namespace dlib
{ {
......
...@@ -83,6 +83,58 @@ namespace dlib ...@@ -83,6 +83,58 @@ namespace dlib
private: private:
__m128 x; __m128 x;
}; };
#elif defined(DLIB_HAVE_VSX)
class simd4f
{
typedef union {
vector float v;
float x[4];
} v4f;
v4f x;
public:
inline simd4f() : x{0,0,0,0} {}
inline simd4f(const simd4f& v) : x(v.x) { }
inline simd4f(const vector float& v) : x{v} { }
inline simd4f(const simd4i& v) {
x.x[0]=v[0]; x.x[1]=v[1]; x.x[2]=v[2]; x.x[3]=v[3];
}
inline simd4f(float f) : x{f,f,f,f} { }
inline simd4f(float r0, float r1, float r2, float r3)
: x{r0,r1,r2,r3} { }
inline simd4f& operator=(const simd4f& v) { x = v.x; return *this; }
inline simd4f& operator=(const float& v) { *this = simd4f(v); return *this; }
inline vector float operator() () const { return x.v; }
inline float operator[](unsigned int idx) const { return x.x[idx]; }
inline void load_aligned(const float* ptr) { x.v = vec_ld(0, ptr); }
inline void store_aligned(float* ptr) const { vec_st(x.v, 0, ptr); }
inline void load(const float* ptr) { x.v = vec_vsx_ld(0, ptr); }
inline void store(float* ptr) const { vec_vsx_st(x.v, 0, ptr); }
// truncate to 32bit integers
inline operator simd4i::rawarray() const
{
simd4i::rawarray temp;
temp.v.x[0] = x.x[0];
temp.v.x[1] = x.x[1];
temp.v.x[2] = x.x[2];
temp.v.x[3] = x.x[3];
return temp;
}
};
typedef simd4i simd4f_bool;
#else #else
class simd4f class simd4f
{ {
...@@ -190,6 +242,8 @@ namespace dlib ...@@ -190,6 +242,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_add_ps(lhs, rhs); return _mm_add_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_add(lhs(), rhs());
#else #else
return simd4f(lhs[0]+rhs[0], return simd4f(lhs[0]+rhs[0],
lhs[1]+rhs[1], lhs[1]+rhs[1],
...@@ -206,6 +260,8 @@ namespace dlib ...@@ -206,6 +260,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_sub_ps(lhs, rhs); return _mm_sub_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_sub(lhs(), rhs());
#else #else
return simd4f(lhs[0]-rhs[0], return simd4f(lhs[0]-rhs[0],
lhs[1]-rhs[1], lhs[1]-rhs[1],
...@@ -222,6 +278,8 @@ namespace dlib ...@@ -222,6 +278,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_mul_ps(lhs, rhs); return _mm_mul_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_mul(lhs(), rhs());
#else #else
return simd4f(lhs[0]*rhs[0], return simd4f(lhs[0]*rhs[0],
lhs[1]*rhs[1], lhs[1]*rhs[1],
...@@ -238,6 +296,8 @@ namespace dlib ...@@ -238,6 +296,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_div_ps(lhs, rhs); return _mm_div_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_div(lhs(), rhs());
#else #else
return simd4f(lhs[0]/rhs[0], return simd4f(lhs[0]/rhs[0],
lhs[1]/rhs[1], lhs[1]/rhs[1],
...@@ -254,6 +314,8 @@ namespace dlib ...@@ -254,6 +314,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmpeq_ps(lhs, rhs); return _mm_cmpeq_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_cmpeq(lhs(), rhs());
#else #else
return simd4f_bool(lhs[0]==rhs[0], return simd4f_bool(lhs[0]==rhs[0],
lhs[1]==rhs[1], lhs[1]==rhs[1],
...@@ -268,6 +330,8 @@ namespace dlib ...@@ -268,6 +330,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmpneq_ps(lhs, rhs); return _mm_cmpneq_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return ~(lhs==rhs); // simd4f_bool is simd4i typedef, can use ~
#else #else
return simd4f_bool(lhs[0]!=rhs[0], return simd4f_bool(lhs[0]!=rhs[0],
lhs[1]!=rhs[1], lhs[1]!=rhs[1],
...@@ -282,6 +346,8 @@ namespace dlib ...@@ -282,6 +346,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmplt_ps(lhs, rhs); return _mm_cmplt_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_cmplt(lhs(), rhs());
#else #else
return simd4f_bool(lhs[0]<rhs[0], return simd4f_bool(lhs[0]<rhs[0],
lhs[1]<rhs[1], lhs[1]<rhs[1],
...@@ -303,6 +369,8 @@ namespace dlib ...@@ -303,6 +369,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmple_ps(lhs, rhs); return _mm_cmple_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_cmple(lhs(), rhs());
#else #else
return simd4f_bool(lhs[0]<=rhs[0], return simd4f_bool(lhs[0]<=rhs[0],
lhs[1]<=rhs[1], lhs[1]<=rhs[1],
...@@ -324,6 +392,8 @@ namespace dlib ...@@ -324,6 +392,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_min_ps(lhs, rhs); return _mm_min_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_min(lhs(), rhs());
#else #else
return simd4f(std::min(lhs[0],rhs[0]), return simd4f(std::min(lhs[0],rhs[0]),
std::min(lhs[1],rhs[1]), std::min(lhs[1],rhs[1]),
...@@ -338,6 +408,8 @@ namespace dlib ...@@ -338,6 +408,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_max_ps(lhs, rhs); return _mm_max_ps(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_max(lhs(), rhs());
#else #else
return simd4f(std::max(lhs[0],rhs[0]), return simd4f(std::max(lhs[0],rhs[0]),
std::max(lhs[1],rhs[1]), std::max(lhs[1],rhs[1]),
...@@ -352,6 +424,8 @@ namespace dlib ...@@ -352,6 +424,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_rcp_ps(item); return _mm_rcp_ps(item);
#elif defined(DLIB_HAVE_VSX)
return vec_re(item());
#else #else
return simd4f(1.0f/item[0], return simd4f(1.0f/item[0],
1.0f/item[1], 1.0f/item[1],
...@@ -366,6 +440,8 @@ namespace dlib ...@@ -366,6 +440,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_rsqrt_ps(item); return _mm_rsqrt_ps(item);
#elif defined(DLIB_HAVE_VSX)
return vec_rsqrt(item());
#else #else
return simd4f(1.0f/std::sqrt(item[0]), return simd4f(1.0f/std::sqrt(item[0]),
1.0f/std::sqrt(item[1]), 1.0f/std::sqrt(item[1]),
...@@ -410,6 +486,8 @@ namespace dlib ...@@ -410,6 +486,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_sqrt_ps(item); return _mm_sqrt_ps(item);
#elif defined(DLIB_HAVE_VSX)
return vec_sqrt(item());
#else #else
return simd4f(std::sqrt(item[0]), return simd4f(std::sqrt(item[0]),
std::sqrt(item[1]), std::sqrt(item[1]),
...@@ -434,6 +512,8 @@ namespace dlib ...@@ -434,6 +512,8 @@ namespace dlib
simd4f temp2; simd4f temp2;
temp2.load(temp); temp2.load(temp);
return temp2; return temp2;
#elif defined(DLIB_HAVE_VSX)
return vec_ceil(item());
#else #else
return simd4f(std::ceil(item[0]), return simd4f(std::ceil(item[0]),
std::ceil(item[1]), std::ceil(item[1]),
...@@ -458,6 +538,8 @@ namespace dlib ...@@ -458,6 +538,8 @@ namespace dlib
simd4f temp2; simd4f temp2;
temp2.load(temp); temp2.load(temp);
return temp2; return temp2;
#elif defined(DLIB_HAVE_VSX)
return vec_floor(item());
#else #else
return simd4f(std::floor(item[0]), return simd4f(std::floor(item[0]),
std::floor(item[1]), std::floor(item[1]),
......
...@@ -44,6 +44,52 @@ namespace dlib ...@@ -44,6 +44,52 @@ namespace dlib
private: private:
__m128i x; __m128i x;
}; };
#elif defined(DLIB_HAVE_VSX)
class simd4i
{
typedef union {
vector signed int v;
vector bool int b;
signed int x[4];
} v4i;
v4i x;
public:
inline simd4i() : x{0,0,0,0} { }
inline simd4i(const simd4i& v) : x(v.x) { }
inline simd4i(const vector int& v) : x{v} { }
inline simd4i(const vector bool int& b) { x.b=b; }
inline simd4i(int32 f) : x{f,f,f,f} { }
inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3)
: x{r0,r1,r2,r3} { }
inline simd4i& operator=(const simd4i& v) { x = v.x; return *this; }
inline simd4i& operator=(const int32& v) { *this = simd4i(v); return *this; }
inline vector signed int operator() () const { return x.v; }
inline int32 operator[](unsigned int idx) const { return x.x[idx]; }
inline vector bool int to_bool() const { return x.b; }
// intrinsics now seem to use xxpermdi automatically now
inline void load_aligned(const int32* ptr) { x.v = vec_ld(0, ptr); }
inline void store_aligned(int32* ptr) const { vec_st(x.v, 0, ptr); }
inline void load(const int32* ptr) { x.v = vec_vsx_ld(0, ptr); }
inline void store(int32* ptr) const { vec_vsx_st(x.v, 0, ptr); }
struct rawarray
{
v4i v;
};
inline simd4i(const rawarray& a) : x{a.v} { }
};
#else #else
class simd4i class simd4i
...@@ -117,6 +163,8 @@ namespace dlib ...@@ -117,6 +163,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_add_epi32(lhs, rhs); return _mm_add_epi32(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_add(lhs(), rhs());
#else #else
return simd4i(lhs[0]+rhs[0], return simd4i(lhs[0]+rhs[0],
lhs[1]+rhs[1], lhs[1]+rhs[1],
...@@ -133,6 +181,8 @@ namespace dlib ...@@ -133,6 +181,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_sub_epi32(lhs, rhs); return _mm_sub_epi32(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_sub(lhs(), rhs());
#else #else
return simd4i(lhs[0]-rhs[0], return simd4i(lhs[0]-rhs[0],
lhs[1]-rhs[1], lhs[1]-rhs[1],
...@@ -156,6 +206,10 @@ namespace dlib ...@@ -156,6 +206,10 @@ namespace dlib
_lhs[1]*_rhs[1], _lhs[1]*_rhs[1],
_lhs[2]*_rhs[2], _lhs[2]*_rhs[2],
_lhs[3]*_rhs[3]); _lhs[3]*_rhs[3]);
#elif defined(DLIB_HAVE_VSX)
vector int a = lhs(), b = rhs();
asm("vmuluwm %0, %0, %1\n\t" : "+&v" (a) : "v" (b) );
return simd4i(a);
#else #else
return simd4i(lhs[0]*rhs[0], return simd4i(lhs[0]*rhs[0],
lhs[1]*rhs[1], lhs[1]*rhs[1],
...@@ -172,6 +226,8 @@ namespace dlib ...@@ -172,6 +226,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_and_si128(lhs, rhs); return _mm_and_si128(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_and(lhs(), rhs());
#else #else
return simd4i(lhs[0]&rhs[0], return simd4i(lhs[0]&rhs[0],
lhs[1]&rhs[1], lhs[1]&rhs[1],
...@@ -188,6 +244,8 @@ namespace dlib ...@@ -188,6 +244,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_or_si128(lhs, rhs); return _mm_or_si128(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_or(lhs(), rhs());
#else #else
return simd4i(lhs[0]|rhs[0], return simd4i(lhs[0]|rhs[0],
lhs[1]|rhs[1], lhs[1]|rhs[1],
...@@ -204,6 +262,8 @@ namespace dlib ...@@ -204,6 +262,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_xor_si128(lhs, rhs); return _mm_xor_si128(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_xor(lhs(), rhs());
#else #else
return simd4i(lhs[0]^rhs[0], return simd4i(lhs[0]^rhs[0],
lhs[1]^rhs[1], lhs[1]^rhs[1],
...@@ -220,6 +280,8 @@ namespace dlib ...@@ -220,6 +280,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_xor_si128(lhs, _mm_set1_epi32(0xFFFFFFFF)); return _mm_xor_si128(lhs, _mm_set1_epi32(0xFFFFFFFF));
#elif defined(DLIB_HAVE_VSX)
return vec_xor(lhs(), vec_splats(~0));
#else #else
return simd4i(~lhs[0], return simd4i(~lhs[0],
~lhs[1], ~lhs[1],
...@@ -234,6 +296,8 @@ namespace dlib ...@@ -234,6 +296,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_sll_epi32(lhs,_mm_cvtsi32_si128(rhs)); return _mm_sll_epi32(lhs,_mm_cvtsi32_si128(rhs));
#elif defined(DLIB_HAVE_VSX)
return vec_sl(lhs(), vec_splats((uint32_t)rhs));
#else #else
return simd4i(lhs[0]<<rhs, return simd4i(lhs[0]<<rhs,
lhs[1]<<rhs, lhs[1]<<rhs,
...@@ -250,6 +314,8 @@ namespace dlib ...@@ -250,6 +314,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_sra_epi32(lhs,_mm_cvtsi32_si128(rhs)); return _mm_sra_epi32(lhs,_mm_cvtsi32_si128(rhs));
#elif defined(DLIB_HAVE_VSX)
return vec_sr(lhs(), vec_splats((uint32_t)rhs));
#else #else
return simd4i(lhs[0]>>rhs, return simd4i(lhs[0]>>rhs,
lhs[1]>>rhs, lhs[1]>>rhs,
...@@ -266,6 +332,8 @@ namespace dlib ...@@ -266,6 +332,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmpeq_epi32(lhs, rhs); return _mm_cmpeq_epi32(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_cmpeq(lhs(), rhs());
#else #else
return simd4i(lhs[0]==rhs[0] ? 0xFFFFFFFF : 0, return simd4i(lhs[0]==rhs[0] ? 0xFFFFFFFF : 0,
lhs[1]==rhs[1] ? 0xFFFFFFFF : 0, lhs[1]==rhs[1] ? 0xFFFFFFFF : 0,
...@@ -278,7 +346,7 @@ namespace dlib ...@@ -278,7 +346,7 @@ namespace dlib
inline simd4i operator!= (const simd4i& lhs, const simd4i& rhs) inline simd4i operator!= (const simd4i& lhs, const simd4i& rhs)
{ {
#ifdef DLIB_HAVE_SSE2 #if defined(DLIB_HAVE_SSE2) || defined(DLIB_HAVE_VSX)
return ~(lhs==rhs); return ~(lhs==rhs);
#else #else
return simd4i(lhs[0]!=rhs[0] ? 0xFFFFFFFF : 0, return simd4i(lhs[0]!=rhs[0] ? 0xFFFFFFFF : 0,
...@@ -294,6 +362,8 @@ namespace dlib ...@@ -294,6 +362,8 @@ namespace dlib
{ {
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
return _mm_cmplt_epi32(lhs, rhs); return _mm_cmplt_epi32(lhs, rhs);
#elif defined(DLIB_HAVE_VSX)
return vec_cmplt(lhs(), rhs());
#else #else
return simd4i(lhs[0]<rhs[0] ? 0xFFFFFFFF : 0, return simd4i(lhs[0]<rhs[0] ? 0xFFFFFFFF : 0,
lhs[1]<rhs[1] ? 0xFFFFFFFF : 0, lhs[1]<rhs[1] ? 0xFFFFFFFF : 0,
...@@ -343,6 +413,8 @@ namespace dlib ...@@ -343,6 +413,8 @@ namespace dlib
std::min(_lhs[1],_rhs[1]), std::min(_lhs[1],_rhs[1]),
std::min(_lhs[2],_rhs[2]), std::min(_lhs[2],_rhs[2]),
std::min(_lhs[3],_rhs[3])); std::min(_lhs[3],_rhs[3]));
#elif defined(DLIB_HAVE_VSX)
return vec_min(lhs(), rhs());
#else #else
return simd4i(std::min(lhs[0],rhs[0]), return simd4i(std::min(lhs[0],rhs[0]),
std::min(lhs[1],rhs[1]), std::min(lhs[1],rhs[1]),
...@@ -364,6 +436,8 @@ namespace dlib ...@@ -364,6 +436,8 @@ namespace dlib
std::max(_lhs[1],_rhs[1]), std::max(_lhs[1],_rhs[1]),
std::max(_lhs[2],_rhs[2]), std::max(_lhs[2],_rhs[2]),
std::max(_lhs[3],_rhs[3])); std::max(_lhs[3],_rhs[3]));
#elif defined(DLIB_HAVE_VSX)
return vec_max(lhs(), rhs());
#else #else
return simd4i(std::max(lhs[0],rhs[0]), return simd4i(std::max(lhs[0],rhs[0]),
std::max(lhs[1],rhs[1]), std::max(lhs[1],rhs[1]),
...@@ -398,6 +472,8 @@ namespace dlib ...@@ -398,6 +472,8 @@ namespace dlib
return _mm_blendv_epi8(b,a,cmp); return _mm_blendv_epi8(b,a,cmp);
#elif defined(DLIB_HAVE_SSE2) #elif defined(DLIB_HAVE_SSE2)
return ((cmp&a) | _mm_andnot_si128(cmp,b)); return ((cmp&a) | _mm_andnot_si128(cmp,b));
#elif defined(DLIB_HAVE_VSX)
return vec_sel(b(), a(), cmp.to_bool());
#else #else
return ((cmp&a) | (~cmp&b)); return ((cmp&a) | (~cmp&b));
#endif #endif
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include "simd4f.h" #include "simd4f.h"
#include "simd8i.h" #include "simd8i.h"
namespace dlib namespace dlib
{ {
#ifdef DLIB_HAVE_AVX #ifdef DLIB_HAVE_AVX
......
...@@ -51,12 +51,33 @@ ...@@ -51,12 +51,33 @@
#define DLIB_HAVE_AVX2 #define DLIB_HAVE_AVX2
#endif #endif
#endif #endif
#ifdef __ALTIVEC__
#ifndef DLIB_HAVE_ALTIVEC
#define DLIB_HAVE_ALTIVEC
#endif
#endif
#ifdef __VSX__
#ifndef DLIB_HAVE_VSX
#define DLIB_HAVE_VSX
#endif
#endif
#ifdef __VEC__ // __VEC__ = 10206
#ifndef DLIB_HAVE_POWER_VEC // vector and vec_ intrinsics
#define DLIB_HAVE_POWER_VEC
#endif
#endif
#endif #endif
#endif #endif
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
#ifdef DLIB_HAVE_ALTIVEC
#include <altivec.h>
#endif
#ifdef DLIB_HAVE_SSE2 #ifdef DLIB_HAVE_SSE2
#include <xmmintrin.h> #include <xmmintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
...@@ -77,6 +98,8 @@ ...@@ -77,6 +98,8 @@
// #include <avx2intrin.h> // #include <avx2intrin.h>
#endif #endif
#endif // DLIB_SIMd_CHECK_Hh_ #endif // DLIB_SIMd_CHECK_Hh_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment