Commit 7e7943cd authored by Davis King's avatar Davis King

Made all the simd functions explicitly inline because otherwise visual studio 2010

won't inline them.
parent f2cc77aa
...@@ -17,11 +17,11 @@ namespace dlib ...@@ -17,11 +17,11 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd4f() {} inline simd4f() {}
simd4f(float f) { x = _mm_set1_ps(f); } inline simd4f(float f) { x = _mm_set1_ps(f); }
simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); } inline simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); }
simd4f(const __m128& val):x(val) {} inline simd4f(const __m128& val):x(val) {}
simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {} inline simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {}
inline simd4f& operator=(const simd4i& val) inline simd4f& operator=(const simd4i& val)
{ {
...@@ -29,24 +29,24 @@ namespace dlib ...@@ -29,24 +29,24 @@ namespace dlib
return *this; return *this;
} }
simd4f& operator=(const __m128& val) inline simd4f& operator=(const __m128& val)
{ {
x = val; x = val;
return *this; return *this;
} }
operator __m128() const { return x; } inline operator __m128() const { return x; }
// truncate to 32bit integers // truncate to 32bit integers
operator __m128i() const { return _mm_cvttps_epi32(x); } inline operator __m128i() const { return _mm_cvttps_epi32(x); }
void load_aligned(const type* ptr) { x = _mm_load_ps(ptr); } inline void load_aligned(const type* ptr) { x = _mm_load_ps(ptr); }
void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); } inline void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); }
void load(const type* ptr) { x = _mm_loadu_ps(ptr); } inline void load(const type* ptr) { x = _mm_loadu_ps(ptr); }
void store(type* ptr) const { _mm_storeu_ps(ptr, x); } inline void store(type* ptr) const { _mm_storeu_ps(ptr, x); }
unsigned int size() const { return 4; } inline unsigned int size() const { return 4; }
float operator[](unsigned int idx) const inline float operator[](unsigned int idx) const
{ {
float temp[4]; float temp[4];
store(temp); store(temp);
...@@ -62,16 +62,16 @@ namespace dlib ...@@ -62,16 +62,16 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd4f_bool() {} inline simd4f_bool() {}
simd4f_bool(const __m128& val):x(val) {} inline simd4f_bool(const __m128& val):x(val) {}
simd4f_bool& operator=(const __m128& val) inline simd4f_bool& operator=(const __m128& val)
{ {
x = val; x = val;
return *this; return *this;
} }
operator __m128() const { return x; } inline operator __m128() const { return x; }
private: private:
...@@ -83,13 +83,13 @@ namespace dlib ...@@ -83,13 +83,13 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd4f() {} inline simd4f() {}
simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; } inline simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;} inline simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];} inline simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];}
// truncate to 32bit integers // truncate to 32bit integers
operator simd4i::rawarray() const inline operator simd4i::rawarray() const
{ {
simd4i::rawarray temp; simd4i::rawarray temp;
temp.a[0] = (int32)x[0]; temp.a[0] = (int32)x[0];
...@@ -109,7 +109,7 @@ namespace dlib ...@@ -109,7 +109,7 @@ namespace dlib
} }
void load_aligned(const type* ptr) inline void load_aligned(const type* ptr)
{ {
x[0] = ptr[0]; x[0] = ptr[0];
x[1] = ptr[1]; x[1] = ptr[1];
...@@ -117,7 +117,7 @@ namespace dlib ...@@ -117,7 +117,7 @@ namespace dlib
x[3] = ptr[3]; x[3] = ptr[3];
} }
void store_aligned(type* ptr) const inline void store_aligned(type* ptr) const
{ {
ptr[0] = x[0]; ptr[0] = x[0];
ptr[1] = x[1]; ptr[1] = x[1];
...@@ -125,7 +125,7 @@ namespace dlib ...@@ -125,7 +125,7 @@ namespace dlib
ptr[3] = x[3]; ptr[3] = x[3];
} }
void load(const type* ptr) inline void load(const type* ptr)
{ {
x[0] = ptr[0]; x[0] = ptr[0];
x[1] = ptr[1]; x[1] = ptr[1];
...@@ -133,7 +133,7 @@ namespace dlib ...@@ -133,7 +133,7 @@ namespace dlib
x[3] = ptr[3]; x[3] = ptr[3];
} }
void store(type* ptr) const inline void store(type* ptr) const
{ {
ptr[0] = x[0]; ptr[0] = x[0];
ptr[1] = x[1]; ptr[1] = x[1];
...@@ -141,8 +141,8 @@ namespace dlib ...@@ -141,8 +141,8 @@ namespace dlib
ptr[3] = x[3]; ptr[3] = x[3];
} }
unsigned int size() const { return 4; } inline unsigned int size() const { return 4; }
float operator[](unsigned int idx) const { return x[idx]; } inline float operator[](unsigned int idx) const { return x[idx]; }
private: private:
float x[4]; float x[4];
...@@ -153,10 +153,10 @@ namespace dlib ...@@ -153,10 +153,10 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd4f_bool() {} inline simd4f_bool() {}
simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;} inline simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
bool operator[](unsigned int idx) const { return x[idx]; } inline bool operator[](unsigned int idx) const { return x[idx]; }
private: private:
bool x[4]; bool x[4];
}; };
......
...@@ -15,26 +15,26 @@ namespace dlib ...@@ -15,26 +15,26 @@ namespace dlib
public: public:
typedef int32 type; typedef int32 type;
simd4i() {} inline simd4i() {}
simd4i(int32 f) { x = _mm_set1_epi32(f); } inline simd4i(int32 f) { x = _mm_set1_epi32(f); }
simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); } inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); }
simd4i(const __m128i& val):x(val) {} inline simd4i(const __m128i& val):x(val) {}
simd4i& operator=(const __m128i& val) inline simd4i& operator=(const __m128i& val)
{ {
x = val; x = val;
return *this; return *this;
} }
operator __m128i() const { return x; } inline operator __m128i() const { return x; }
void load_aligned(const type* ptr) { x = _mm_load_si128((const __m128i*)ptr); } inline void load_aligned(const type* ptr) { x = _mm_load_si128((const __m128i*)ptr); }
void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); } inline void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); }
void load(const type* ptr) { x = _mm_loadu_si128((const __m128i*)ptr); } inline void load(const type* ptr) { x = _mm_loadu_si128((const __m128i*)ptr); }
void store(type* ptr) const { _mm_storeu_si128((__m128i*)ptr, x); } inline void store(type* ptr) const { _mm_storeu_si128((__m128i*)ptr, x); }
unsigned int size() const { return 4; } inline unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const inline int32 operator[](unsigned int idx) const
{ {
int32 temp[4]; int32 temp[4];
store(temp); store(temp);
...@@ -51,17 +51,17 @@ namespace dlib ...@@ -51,17 +51,17 @@ namespace dlib
public: public:
typedef int32 type; typedef int32 type;
simd4i() {} inline simd4i() {}
simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; } inline simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;} inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
struct rawarray struct rawarray
{ {
int32 a[4]; int32 a[4];
}; };
simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; } inline simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; }
void load_aligned(const type* ptr) inline void load_aligned(const type* ptr)
{ {
x[0] = ptr[0]; x[0] = ptr[0];
x[1] = ptr[1]; x[1] = ptr[1];
...@@ -69,7 +69,7 @@ namespace dlib ...@@ -69,7 +69,7 @@ namespace dlib
x[3] = ptr[3]; x[3] = ptr[3];
} }
void store_aligned(type* ptr) const inline void store_aligned(type* ptr) const
{ {
ptr[0] = x[0]; ptr[0] = x[0];
ptr[1] = x[1]; ptr[1] = x[1];
...@@ -77,7 +77,7 @@ namespace dlib ...@@ -77,7 +77,7 @@ namespace dlib
ptr[3] = x[3]; ptr[3] = x[3];
} }
void load(const type* ptr) inline void load(const type* ptr)
{ {
x[0] = ptr[0]; x[0] = ptr[0];
x[1] = ptr[1]; x[1] = ptr[1];
...@@ -85,7 +85,7 @@ namespace dlib ...@@ -85,7 +85,7 @@ namespace dlib
x[3] = ptr[3]; x[3] = ptr[3];
} }
void store(type* ptr) const inline void store(type* ptr) const
{ {
ptr[0] = x[0]; ptr[0] = x[0];
ptr[1] = x[1]; ptr[1] = x[1];
...@@ -93,8 +93,8 @@ namespace dlib ...@@ -93,8 +93,8 @@ namespace dlib
ptr[3] = x[3]; ptr[3] = x[3];
} }
unsigned int size() const { return 4; } inline unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const { return x[idx]; } inline int32 operator[](unsigned int idx) const { return x[idx]; }
private: private:
int32 x[4]; int32 x[4];
......
...@@ -16,18 +16,18 @@ namespace dlib ...@@ -16,18 +16,18 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd8f() {} inline simd8f() {}
simd8f(const simd4f& low, const simd4f& high) inline simd8f(const simd4f& low, const simd4f& high)
{ {
x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1); x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
} }
simd8f(float f) { x = _mm256_set1_ps(f); } inline simd8f(float f) { x = _mm256_set1_ps(f); }
inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7)
{ x = _mm256_setr_ps(r0,r1,r2,r3,r4,r5,r6,r7); } { x = _mm256_setr_ps(r0,r1,r2,r3,r4,r5,r6,r7); }
simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {} inline simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {}
simd8f(const __m256& val):x(val) {} inline simd8f(const __m256& val):x(val) {}
simd8f& operator=(const __m256& val) inline simd8f& operator=(const __m256& val)
{ {
x = val; x = val;
return *this; return *this;
...@@ -35,23 +35,23 @@ namespace dlib ...@@ -35,23 +35,23 @@ namespace dlib
inline operator __m256() const { return x; } inline operator __m256() const { return x; }
// truncate to 32bit integers // truncate to 32bit integers
operator __m256i() const { return _mm256_cvttps_epi32(x); } inline operator __m256i() const { return _mm256_cvttps_epi32(x); }
void load_aligned(const type* ptr) { x = _mm256_load_ps(ptr); } inline void load_aligned(const type* ptr) { x = _mm256_load_ps(ptr); }
void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); } inline void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); }
void load(const type* ptr) { x = _mm256_loadu_ps(ptr); } inline void load(const type* ptr) { x = _mm256_loadu_ps(ptr); }
void store(type* ptr) const { _mm256_storeu_ps(ptr, x); } inline void store(type* ptr) const { _mm256_storeu_ps(ptr, x); }
unsigned int size() const { return 8; } inline unsigned int size() const { return 8; }
float operator[](unsigned int idx) const inline float operator[](unsigned int idx) const
{ {
float temp[8]; float temp[8];
store(temp); store(temp);
return temp[idx]; return temp[idx];
} }
simd4f low() const { return _mm256_castps256_ps128(x); } inline simd4f low() const { return _mm256_castps256_ps128(x); }
simd4f high() const { return _mm256_extractf128_ps(x,1); } inline simd4f high() const { return _mm256_extractf128_ps(x,1); }
private: private:
__m256 x; __m256 x;
...@@ -63,20 +63,20 @@ namespace dlib ...@@ -63,20 +63,20 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd8f_bool() {} inline simd8f_bool() {}
simd8f_bool(const __m256& val):x(val) {} inline simd8f_bool(const __m256& val):x(val) {}
simd8f_bool(const simd4f_bool& low, const simd4f_bool& high) inline simd8f_bool(const simd4f_bool& low, const simd4f_bool& high)
{ {
x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1); x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
} }
simd8f_bool& operator=(const __m256& val) inline simd8f_bool& operator=(const __m256& val)
{ {
x = val; x = val;
return *this; return *this;
} }
operator __m256() const { return x; } inline operator __m256() const { return x; }
private: private:
...@@ -89,15 +89,15 @@ namespace dlib ...@@ -89,15 +89,15 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd8f() {} inline simd8f() {}
simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){} inline simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){}
simd8f(float f) :_low(f),_high(f) {} inline simd8f(float f) :_low(f),_high(f) {}
simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) : inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) :
_low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {} _low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { } inline simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { }
// truncate to 32bit integers // truncate to 32bit integers
operator simd8i::rawarray() const inline operator simd8i::rawarray() const
{ {
simd8i::rawarray temp; simd8i::rawarray temp;
temp.low = simd4i(_low); temp.low = simd4i(_low);
...@@ -105,13 +105,13 @@ namespace dlib ...@@ -105,13 +105,13 @@ namespace dlib
return temp; return temp;
} }
void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); } inline void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); } inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); } inline void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); } inline void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
unsigned int size() const { return 8; } inline unsigned int size() const { return 8; }
float operator[](unsigned int idx) const inline float operator[](unsigned int idx) const
{ {
if (idx < 4) if (idx < 4)
return _low[idx]; return _low[idx];
...@@ -119,8 +119,8 @@ namespace dlib ...@@ -119,8 +119,8 @@ namespace dlib
return _high[idx-4]; return _high[idx-4];
} }
simd4f low() const { return _low; } inline simd4f low() const { return _low; }
simd4f high() const { return _high; } inline simd4f high() const { return _high; }
private: private:
simd4f _low, _high; simd4f _low, _high;
...@@ -131,12 +131,12 @@ namespace dlib ...@@ -131,12 +131,12 @@ namespace dlib
public: public:
typedef float type; typedef float type;
simd8f_bool() {} inline simd8f_bool() {}
simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){} inline simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){}
simd4f_bool low() const { return _low; } inline simd4f_bool low() const { return _low; }
simd4f_bool high() const { return _high; } inline simd4f_bool high() const { return _high; }
private: private:
simd4f_bool _low,_high; simd4f_bool _low,_high;
}; };
......
...@@ -15,37 +15,37 @@ namespace dlib ...@@ -15,37 +15,37 @@ namespace dlib
public: public:
typedef int32 type; typedef int32 type;
simd8i() {} inline simd8i() {}
simd8i(int32 f) { x = _mm256_set1_epi32(f); } inline simd8i(int32 f) { x = _mm256_set1_epi32(f); }
simd8i(int32 r0, int32 r1, int32 r2, int32 r3, inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3,
int32 r4, int32 r5, int32 r6, int32 r7 ) int32 r4, int32 r5, int32 r6, int32 r7 )
{ x = _mm256_setr_epi32(r0,r1,r2,r3,r4,r5,r6,r7); } { x = _mm256_setr_epi32(r0,r1,r2,r3,r4,r5,r6,r7); }
simd8i(const __m256i& val):x(val) {} inline simd8i(const __m256i& val):x(val) {}
simd8i(const simd4i& low, const simd4i& high) inline simd8i(const simd4i& low, const simd4i& high)
{ {
x = _mm256_insertf128_si256(_mm256_castsi128_si256(low),high,1); x = _mm256_insertf128_si256(_mm256_castsi128_si256(low),high,1);
} }
simd8i& operator=(const __m256i& val) inline simd8i& operator=(const __m256i& val)
{ {
x = val; x = val;
return *this; return *this;
} }
operator __m256i() const { return x; } inline operator __m256i() const { return x; }
void load_aligned(const type* ptr) { x = _mm256_load_si256((const __m256i*)ptr); } inline void load_aligned(const type* ptr) { x = _mm256_load_si256((const __m256i*)ptr); }
void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); } inline void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); }
void load(const type* ptr) { x = _mm256_loadu_si256((const __m256i*)ptr); } inline void load(const type* ptr) { x = _mm256_loadu_si256((const __m256i*)ptr); }
void store(type* ptr) const { _mm256_storeu_si256((__m256i*)ptr, x); } inline void store(type* ptr) const { _mm256_storeu_si256((__m256i*)ptr, x); }
simd4i low() const { return _mm256_castsi256_si128(x); } inline simd4i low() const { return _mm256_castsi256_si128(x); }
simd4i high() const { return _mm256_extractf128_si256(x,1); } inline simd4i high() const { return _mm256_extractf128_si256(x,1); }
unsigned int size() const { return 4; } inline unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const inline int32 operator[](unsigned int idx) const
{ {
int32 temp[8]; int32 temp[8];
store(temp); store(temp);
...@@ -61,29 +61,29 @@ namespace dlib ...@@ -61,29 +61,29 @@ namespace dlib
public: public:
typedef int32 type; typedef int32 type;
simd8i() {} inline simd8i() {}
simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){} inline simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){}
simd8i(int32 f) :_low(f),_high(f) {} inline simd8i(int32 f) :_low(f),_high(f) {}
simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) : inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) :
_low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {} _low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
struct rawarray struct rawarray
{ {
simd4i low, high; simd4i low, high;
}; };
simd8i(const rawarray& a) inline simd8i(const rawarray& a)
{ {
_low = a.low; _low = a.low;
_high = a.high; _high = a.high;
} }
void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); } inline void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); } inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); } inline void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); } inline void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
unsigned int size() const { return 8; } inline unsigned int size() const { return 8; }
int32 operator[](unsigned int idx) const inline int32 operator[](unsigned int idx) const
{ {
if (idx < 4) if (idx < 4)
return _low[idx]; return _low[idx];
...@@ -91,8 +91,8 @@ namespace dlib ...@@ -91,8 +91,8 @@ namespace dlib
return _high[idx-4]; return _high[idx-4];
} }
simd4i low() const { return _low; } inline simd4i low() const { return _low; }
simd4i high() const { return _high; } inline simd4i high() const { return _high; }
private: private:
simd4i _low, _high; simd4i _low, _high;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment