Commit 7e7943cd authored by Davis King's avatar Davis King

Made all the simd functions explicitly inline because otherwise visual studio 2010

won't inline them.
parent f2cc77aa
......@@ -17,11 +17,11 @@ namespace dlib
public:
typedef float type;
simd4f() {}
simd4f(float f) { x = _mm_set1_ps(f); }
simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); }
simd4f(const __m128& val):x(val) {}
simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {}
inline simd4f() {}
inline simd4f(float f) { x = _mm_set1_ps(f); }
inline simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); }
inline simd4f(const __m128& val):x(val) {}
inline simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {}
inline simd4f& operator=(const simd4i& val)
{
......@@ -29,24 +29,24 @@ namespace dlib
return *this;
}
simd4f& operator=(const __m128& val)
inline simd4f& operator=(const __m128& val)
{
x = val;
return *this;
}
operator __m128() const { return x; }
inline operator __m128() const { return x; }
// truncate to 32bit integers
operator __m128i() const { return _mm_cvttps_epi32(x); }
inline operator __m128i() const { return _mm_cvttps_epi32(x); }
void load_aligned(const type* ptr) { x = _mm_load_ps(ptr); }
void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); }
void load(const type* ptr) { x = _mm_loadu_ps(ptr); }
void store(type* ptr) const { _mm_storeu_ps(ptr, x); }
inline void load_aligned(const type* ptr) { x = _mm_load_ps(ptr); }
inline void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); }
inline void load(const type* ptr) { x = _mm_loadu_ps(ptr); }
inline void store(type* ptr) const { _mm_storeu_ps(ptr, x); }
unsigned int size() const { return 4; }
float operator[](unsigned int idx) const
inline unsigned int size() const { return 4; }
inline float operator[](unsigned int idx) const
{
float temp[4];
store(temp);
......@@ -62,16 +62,16 @@ namespace dlib
public:
typedef float type;
simd4f_bool() {}
simd4f_bool(const __m128& val):x(val) {}
inline simd4f_bool() {}
inline simd4f_bool(const __m128& val):x(val) {}
simd4f_bool& operator=(const __m128& val)
inline simd4f_bool& operator=(const __m128& val)
{
x = val;
return *this;
}
operator __m128() const { return x; }
inline operator __m128() const { return x; }
private:
......@@ -83,13 +83,13 @@ namespace dlib
public:
typedef float type;
simd4f() {}
simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];}
inline simd4f() {}
inline simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
inline simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
inline simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];}
// truncate to 32bit integers
operator simd4i::rawarray() const
inline operator simd4i::rawarray() const
{
simd4i::rawarray temp;
temp.a[0] = (int32)x[0];
......@@ -109,7 +109,7 @@ namespace dlib
}
void load_aligned(const type* ptr)
inline void load_aligned(const type* ptr)
{
x[0] = ptr[0];
x[1] = ptr[1];
......@@ -117,7 +117,7 @@ namespace dlib
x[3] = ptr[3];
}
void store_aligned(type* ptr) const
inline void store_aligned(type* ptr) const
{
ptr[0] = x[0];
ptr[1] = x[1];
......@@ -125,7 +125,7 @@ namespace dlib
ptr[3] = x[3];
}
void load(const type* ptr)
inline void load(const type* ptr)
{
x[0] = ptr[0];
x[1] = ptr[1];
......@@ -133,7 +133,7 @@ namespace dlib
x[3] = ptr[3];
}
void store(type* ptr) const
inline void store(type* ptr) const
{
ptr[0] = x[0];
ptr[1] = x[1];
......@@ -141,8 +141,8 @@ namespace dlib
ptr[3] = x[3];
}
unsigned int size() const { return 4; }
float operator[](unsigned int idx) const { return x[idx]; }
inline unsigned int size() const { return 4; }
inline float operator[](unsigned int idx) const { return x[idx]; }
private:
float x[4];
......@@ -153,10 +153,10 @@ namespace dlib
public:
typedef float type;
simd4f_bool() {}
simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
inline simd4f_bool() {}
inline simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
bool operator[](unsigned int idx) const { return x[idx]; }
inline bool operator[](unsigned int idx) const { return x[idx]; }
private:
bool x[4];
};
......
......@@ -15,26 +15,26 @@ namespace dlib
public:
typedef int32 type;
simd4i() {}
simd4i(int32 f) { x = _mm_set1_epi32(f); }
simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); }
simd4i(const __m128i& val):x(val) {}
inline simd4i() {}
inline simd4i(int32 f) { x = _mm_set1_epi32(f); }
inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); }
inline simd4i(const __m128i& val):x(val) {}
simd4i& operator=(const __m128i& val)
inline simd4i& operator=(const __m128i& val)
{
x = val;
return *this;
}
operator __m128i() const { return x; }
inline operator __m128i() const { return x; }
void load_aligned(const type* ptr) { x = _mm_load_si128((const __m128i*)ptr); }
void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); }
void load(const type* ptr) { x = _mm_loadu_si128((const __m128i*)ptr); }
void store(type* ptr) const { _mm_storeu_si128((__m128i*)ptr, x); }
inline void load_aligned(const type* ptr) { x = _mm_load_si128((const __m128i*)ptr); }
inline void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); }
inline void load(const type* ptr) { x = _mm_loadu_si128((const __m128i*)ptr); }
inline void store(type* ptr) const { _mm_storeu_si128((__m128i*)ptr, x); }
unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const
inline unsigned int size() const { return 4; }
inline int32 operator[](unsigned int idx) const
{
int32 temp[4];
store(temp);
......@@ -51,17 +51,17 @@ namespace dlib
public:
typedef int32 type;
simd4i() {}
simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
inline simd4i() {}
inline simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
struct rawarray
{
int32 a[4];
};
simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; }
inline simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; }
void load_aligned(const type* ptr)
inline void load_aligned(const type* ptr)
{
x[0] = ptr[0];
x[1] = ptr[1];
......@@ -69,7 +69,7 @@ namespace dlib
x[3] = ptr[3];
}
void store_aligned(type* ptr) const
inline void store_aligned(type* ptr) const
{
ptr[0] = x[0];
ptr[1] = x[1];
......@@ -77,7 +77,7 @@ namespace dlib
ptr[3] = x[3];
}
void load(const type* ptr)
inline void load(const type* ptr)
{
x[0] = ptr[0];
x[1] = ptr[1];
......@@ -85,7 +85,7 @@ namespace dlib
x[3] = ptr[3];
}
void store(type* ptr) const
inline void store(type* ptr) const
{
ptr[0] = x[0];
ptr[1] = x[1];
......@@ -93,8 +93,8 @@ namespace dlib
ptr[3] = x[3];
}
unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const { return x[idx]; }
inline unsigned int size() const { return 4; }
inline int32 operator[](unsigned int idx) const { return x[idx]; }
private:
int32 x[4];
......
......@@ -16,18 +16,18 @@ namespace dlib
public:
typedef float type;
simd8f() {}
simd8f(const simd4f& low, const simd4f& high)
inline simd8f() {}
inline simd8f(const simd4f& low, const simd4f& high)
{
x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
}
simd8f(float f) { x = _mm256_set1_ps(f); }
inline simd8f(float f) { x = _mm256_set1_ps(f); }
inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7)
{ x = _mm256_setr_ps(r0,r1,r2,r3,r4,r5,r6,r7); }
simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {}
simd8f(const __m256& val):x(val) {}
simd8f& operator=(const __m256& val)
inline simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {}
inline simd8f(const __m256& val):x(val) {}
inline simd8f& operator=(const __m256& val)
{
x = val;
return *this;
......@@ -35,23 +35,23 @@ namespace dlib
inline operator __m256() const { return x; }
// truncate to 32bit integers
operator __m256i() const { return _mm256_cvttps_epi32(x); }
inline operator __m256i() const { return _mm256_cvttps_epi32(x); }
void load_aligned(const type* ptr) { x = _mm256_load_ps(ptr); }
void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); }
void load(const type* ptr) { x = _mm256_loadu_ps(ptr); }
void store(type* ptr) const { _mm256_storeu_ps(ptr, x); }
inline void load_aligned(const type* ptr) { x = _mm256_load_ps(ptr); }
inline void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); }
inline void load(const type* ptr) { x = _mm256_loadu_ps(ptr); }
inline void store(type* ptr) const { _mm256_storeu_ps(ptr, x); }
unsigned int size() const { return 8; }
float operator[](unsigned int idx) const
inline unsigned int size() const { return 8; }
inline float operator[](unsigned int idx) const
{
float temp[8];
store(temp);
return temp[idx];
}
simd4f low() const { return _mm256_castps256_ps128(x); }
simd4f high() const { return _mm256_extractf128_ps(x,1); }
inline simd4f low() const { return _mm256_castps256_ps128(x); }
inline simd4f high() const { return _mm256_extractf128_ps(x,1); }
private:
__m256 x;
......@@ -63,20 +63,20 @@ namespace dlib
public:
typedef float type;
simd8f_bool() {}
simd8f_bool(const __m256& val):x(val) {}
simd8f_bool(const simd4f_bool& low, const simd4f_bool& high)
inline simd8f_bool() {}
inline simd8f_bool(const __m256& val):x(val) {}
inline simd8f_bool(const simd4f_bool& low, const simd4f_bool& high)
{
x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
}
simd8f_bool& operator=(const __m256& val)
inline simd8f_bool& operator=(const __m256& val)
{
x = val;
return *this;
}
operator __m256() const { return x; }
inline operator __m256() const { return x; }
private:
......@@ -89,15 +89,15 @@ namespace dlib
public:
typedef float type;
simd8f() {}
simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){}
simd8f(float f) :_low(f),_high(f) {}
simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) :
inline simd8f() {}
inline simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){}
inline simd8f(float f) :_low(f),_high(f) {}
inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) :
_low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { }
inline simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { }
// truncate to 32bit integers
operator simd8i::rawarray() const
inline operator simd8i::rawarray() const
{
simd8i::rawarray temp;
temp.low = simd4i(_low);
......@@ -105,13 +105,13 @@ namespace dlib
return temp;
}
void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
inline void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
inline void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
inline void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
unsigned int size() const { return 8; }
float operator[](unsigned int idx) const
inline unsigned int size() const { return 8; }
inline float operator[](unsigned int idx) const
{
if (idx < 4)
return _low[idx];
......@@ -119,8 +119,8 @@ namespace dlib
return _high[idx-4];
}
simd4f low() const { return _low; }
simd4f high() const { return _high; }
inline simd4f low() const { return _low; }
inline simd4f high() const { return _high; }
private:
simd4f _low, _high;
......@@ -131,12 +131,12 @@ namespace dlib
public:
typedef float type;
simd8f_bool() {}
simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){}
inline simd8f_bool() {}
inline simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){}
simd4f_bool low() const { return _low; }
simd4f_bool high() const { return _high; }
inline simd4f_bool low() const { return _low; }
inline simd4f_bool high() const { return _high; }
private:
simd4f_bool _low,_high;
};
......
......@@ -15,37 +15,37 @@ namespace dlib
public:
typedef int32 type;
simd8i() {}
simd8i(int32 f) { x = _mm256_set1_epi32(f); }
simd8i(int32 r0, int32 r1, int32 r2, int32 r3,
inline simd8i() {}
inline simd8i(int32 f) { x = _mm256_set1_epi32(f); }
inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3,
int32 r4, int32 r5, int32 r6, int32 r7 )
{ x = _mm256_setr_epi32(r0,r1,r2,r3,r4,r5,r6,r7); }
simd8i(const __m256i& val):x(val) {}
inline simd8i(const __m256i& val):x(val) {}
simd8i(const simd4i& low, const simd4i& high)
inline simd8i(const simd4i& low, const simd4i& high)
{
x = _mm256_insertf128_si256(_mm256_castsi128_si256(low),high,1);
}
simd8i& operator=(const __m256i& val)
inline simd8i& operator=(const __m256i& val)
{
x = val;
return *this;
}
operator __m256i() const { return x; }
inline operator __m256i() const { return x; }
void load_aligned(const type* ptr) { x = _mm256_load_si256((const __m256i*)ptr); }
void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); }
void load(const type* ptr) { x = _mm256_loadu_si256((const __m256i*)ptr); }
void store(type* ptr) const { _mm256_storeu_si256((__m256i*)ptr, x); }
inline void load_aligned(const type* ptr) { x = _mm256_load_si256((const __m256i*)ptr); }
inline void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); }
inline void load(const type* ptr) { x = _mm256_loadu_si256((const __m256i*)ptr); }
inline void store(type* ptr) const { _mm256_storeu_si256((__m256i*)ptr, x); }
simd4i low() const { return _mm256_castsi256_si128(x); }
simd4i high() const { return _mm256_extractf128_si256(x,1); }
inline simd4i low() const { return _mm256_castsi256_si128(x); }
inline simd4i high() const { return _mm256_extractf128_si256(x,1); }
unsigned int size() const { return 4; }
int32 operator[](unsigned int idx) const
inline unsigned int size() const { return 4; }
inline int32 operator[](unsigned int idx) const
{
int32 temp[8];
store(temp);
......@@ -61,29 +61,29 @@ namespace dlib
public:
typedef int32 type;
simd8i() {}
simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){}
simd8i(int32 f) :_low(f),_high(f) {}
simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) :
inline simd8i() {}
inline simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){}
inline simd8i(int32 f) :_low(f),_high(f) {}
inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) :
_low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
struct rawarray
{
simd4i low, high;
};
simd8i(const rawarray& a)
inline simd8i(const rawarray& a)
{
_low = a.low;
_high = a.high;
}
void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
inline void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
inline void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); }
inline void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); }
unsigned int size() const { return 8; }
int32 operator[](unsigned int idx) const
inline unsigned int size() const { return 8; }
inline int32 operator[](unsigned int idx) const
{
if (idx < 4)
return _low[idx];
......@@ -91,8 +91,8 @@ namespace dlib
return _high[idx-4];
}
simd4i low() const { return _low; }
simd4i high() const { return _high; }
inline simd4i low() const { return _low; }
inline simd4i high() const { return _high; }
private:
simd4i _low, _high;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment