Made all the simd functions explicitly inline because otherwise visual studio 2010

won't inline them.

Made all the simd functions explicitly inline because otherwise visual studio 2010
won't inline them.
7e7943cd · Davis King · f2cc77aa · 7e7943cd · 7e7943cd · 7e7943cd
Commit 7e7943cd authored Dec 01, 2013 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 120 additions and 120 deletions

simd4f.h dlib/simd/simd4f.h +32 -32

simd4i.h dlib/simd/simd4i.h +22 -22

simd8f.h dlib/simd/simd8f.h +38 -38

simd8i.h dlib/simd/simd8i.h +28 -28

No files found.
--- a/dlib/simd/simd4f.h
+++ b/dlib/simd/simd4f.h
@@ -17,11 +17,11 @@ namespace dlib
    public:
        typedef float type;
-        simd4f() {}
+        inline simd4f() {}
-        simd4f(float f) { x = _mm_set1_ps(f); }
+        inline simd4f(float f) { x = _mm_set1_ps(f); }
-        simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); }
+        inline simd4f(float r0, float r1, float r2, float r3) { x = _mm_setr_ps(r0,r1,r2,r3); }
-        simd4f(const __m128& val):x(val) {}
+        inline simd4f(const __m128& val):x(val) {}
-        simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {}
+        inline simd4f(const simd4i& val):x(_mm_cvtepi32_ps(val)) {}
        inline simd4f& operator=(const simd4i& val)
        {
@@ -29,24 +29,24 @@ namespace dlib
            return *this;
        }
-        simd4f& operator=(const __m128& val)
+        inline simd4f& operator=(const __m128& val)
        {
            x = val;
            return *this;
        }
-        operator __m128() const { return x; }
+        inline operator __m128() const { return x; }
        // truncate to 32bit integers
-        operator __m128i() const { return _mm_cvttps_epi32(x); }
+        inline operator __m128i() const { return _mm_cvttps_epi32(x); }
-        void load_aligned(const type* ptr)  { x = _mm_load_ps(ptr); }
+        inline void load_aligned(const type* ptr)  { x = _mm_load_ps(ptr); }
-        void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); }
+        inline void store_aligned(type* ptr) const { _mm_store_ps(ptr, x); }
-        void load(const type* ptr)          { x = _mm_loadu_ps(ptr); }
+        inline void load(const type* ptr)          { x = _mm_loadu_ps(ptr); }
-        void store(type* ptr)         const { _mm_storeu_ps(ptr, x); }
+        inline void store(type* ptr)         const { _mm_storeu_ps(ptr, x); }
-        unsigned int size() const { return 4; }
+        inline unsigned int size() const { return 4; }
-        float operator[](unsigned int idx) const 
+        inline float operator[](unsigned int idx) const 
        {
            float temp[4];
            store(temp);
@@ -62,16 +62,16 @@ namespace dlib
    public:
        typedef float type;
-        simd4f_bool() {}
+        inline simd4f_bool() {}
-        simd4f_bool(const __m128& val):x(val) {}
+        inline simd4f_bool(const __m128& val):x(val) {}
-        simd4f_bool& operator=(const __m128& val)
+        inline simd4f_bool& operator=(const __m128& val)
        {
            x = val;
            return *this;
        }
-        operator __m128() const { return x; }
+        inline operator __m128() const { return x; }
    private:
@@ -83,13 +83,13 @@ namespace dlib
    public:
        typedef float type;
-        simd4f() {}
+        inline simd4f() {}
-        simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
+        inline simd4f(float f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
-        simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
+        inline simd4f(float r0, float r1, float r2, float r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
-        simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];}
+        inline simd4f(const simd4i& val) { x[0]=val[0]; x[1]=val[1]; x[2]=val[2]; x[3]=val[3];}
        // truncate to 32bit integers
-        operator simd4i::rawarray() const 
+        inline operator simd4i::rawarray() const 
        { 
            simd4i::rawarray temp;
            temp.a[0] = (int32)x[0];
@@ -109,7 +109,7 @@ namespace dlib
        }
-        void load_aligned(const type* ptr)
+        inline void load_aligned(const type* ptr)
        {
            x[0] = ptr[0];
            x[1] = ptr[1];
@@ -117,7 +117,7 @@ namespace dlib
            x[3] = ptr[3];
        }
-        void store_aligned(type* ptr) const
+        inline void store_aligned(type* ptr) const
        {
            ptr[0] = x[0];
            ptr[1] = x[1];
@@ -125,7 +125,7 @@ namespace dlib
            ptr[3] = x[3];
        }
-        void load(const type* ptr)
+        inline void load(const type* ptr)
        {
            x[0] = ptr[0];
            x[1] = ptr[1];
@@ -133,7 +133,7 @@ namespace dlib
            x[3] = ptr[3];
        }
-        void store(type* ptr) const
+        inline void store(type* ptr) const
        {
            ptr[0] = x[0];
            ptr[1] = x[1];
@@ -141,8 +141,8 @@ namespace dlib
            ptr[3] = x[3];
        }
-        unsigned int size() const { return 4; }
+        inline unsigned int size() const { return 4; }
-        float operator[](unsigned int idx) const { return x[idx]; }
+        inline float operator[](unsigned int idx) const { return x[idx]; }
    private:
        float x[4];
@@ -153,10 +153,10 @@ namespace dlib
    public:
        typedef float type;
-        simd4f_bool() {}
+        inline simd4f_bool() {}
-        simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
+        inline simd4f_bool(bool r0, bool r1, bool r2, bool r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
-        bool operator[](unsigned int idx) const { return x[idx]; }
+        inline bool operator[](unsigned int idx) const { return x[idx]; }
    private:
        bool x[4];
    };

--- a/dlib/simd/simd4i.h
+++ b/dlib/simd/simd4i.h
@@ -15,26 +15,26 @@ namespace dlib
    public:
        typedef int32 type;
-        simd4i() {}
+        inline simd4i() {}
-        simd4i(int32 f) { x = _mm_set1_epi32(f); }
+        inline simd4i(int32 f) { x = _mm_set1_epi32(f); }
-        simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); }
+        inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x = _mm_setr_epi32(r0,r1,r2,r3); }
-        simd4i(const __m128i& val):x(val) {}
+        inline simd4i(const __m128i& val):x(val) {}
-        simd4i& operator=(const __m128i& val)
+        inline simd4i& operator=(const __m128i& val)
        {
            x = val;
            return *this;
        }
-        operator __m128i() const { return x; }
+        inline operator __m128i() const { return x; }
-        void load_aligned(const type* ptr)  { x = _mm_load_si128((const __m128i*)ptr); }
+        inline void load_aligned(const type* ptr)  { x = _mm_load_si128((const __m128i*)ptr); }
-        void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); }
+        inline void store_aligned(type* ptr) const { _mm_store_si128((__m128i*)ptr, x); }
-        void load(const type* ptr)          { x = _mm_loadu_si128((const __m128i*)ptr); }
+        inline void load(const type* ptr)          { x = _mm_loadu_si128((const __m128i*)ptr); }
-        void store(type* ptr)         const { _mm_storeu_si128((__m128i*)ptr, x); }
+        inline void store(type* ptr)         const { _mm_storeu_si128((__m128i*)ptr, x); }
-        unsigned int size() const { return 4; }
+        inline unsigned int size() const { return 4; }
-        int32 operator[](unsigned int idx) const 
+        inline int32 operator[](unsigned int idx) const 
        {
            int32 temp[4];
            store(temp);
@@ -51,17 +51,17 @@ namespace dlib
    public:
        typedef int32 type;
-        simd4i() {}
+        inline simd4i() {}
-        simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
+        inline simd4i(int32 f) { x[0]=f; x[1]=f; x[2]=f; x[3]=f; }
-        simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
+        inline simd4i(int32 r0, int32 r1, int32 r2, int32 r3) { x[0]=r0; x[1]=r1; x[2]=r2; x[3]=r3;}
        struct rawarray
        {
            int32 a[4];
        };
-        simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; }
+        inline simd4i(const rawarray& a) { x[0]=a.a[0]; x[1]=a.a[1]; x[2]=a.a[2]; x[3]=a.a[3]; }
-        void load_aligned(const type* ptr)
+        inline void load_aligned(const type* ptr)
        {
            x[0] = ptr[0];
            x[1] = ptr[1];
@@ -69,7 +69,7 @@ namespace dlib
            x[3] = ptr[3];
        }
-        void store_aligned(type* ptr) const
+        inline void store_aligned(type* ptr) const
        {
            ptr[0] = x[0];
            ptr[1] = x[1];
@@ -77,7 +77,7 @@ namespace dlib
            ptr[3] = x[3];
        }
-        void load(const type* ptr)
+        inline void load(const type* ptr)
        {
            x[0] = ptr[0];
            x[1] = ptr[1];
@@ -85,7 +85,7 @@ namespace dlib
            x[3] = ptr[3];
        }
-        void store(type* ptr) const
+        inline void store(type* ptr) const
        {
            ptr[0] = x[0];
            ptr[1] = x[1];
@@ -93,8 +93,8 @@ namespace dlib
            ptr[3] = x[3];
        }
-        unsigned int size() const { return 4; }
+        inline unsigned int size() const { return 4; }
-        int32 operator[](unsigned int idx) const { return x[idx]; }
+        inline int32 operator[](unsigned int idx) const { return x[idx]; }
    private:
        int32 x[4];

--- a/dlib/simd/simd8f.h
+++ b/dlib/simd/simd8f.h
@@ -16,18 +16,18 @@ namespace dlib
    public:
        typedef float type;
-        simd8f() {}
+        inline simd8f() {}
-        simd8f(const simd4f& low, const simd4f& high)
+        inline simd8f(const simd4f& low, const simd4f& high)
        {
            x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
        }
-        simd8f(float f) { x = _mm256_set1_ps(f); }
+        inline simd8f(float f) { x = _mm256_set1_ps(f); }
        inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) 
        { x = _mm256_setr_ps(r0,r1,r2,r3,r4,r5,r6,r7); }
-        simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {}
+        inline simd8f(const simd8i& val):x(_mm256_cvtepi32_ps(val)) {}
-        simd8f(const __m256& val):x(val) {}
+        inline simd8f(const __m256& val):x(val) {}
-        simd8f& operator=(const __m256& val)
+        inline simd8f& operator=(const __m256& val)
        {
            x = val;
            return *this;
@@ -35,23 +35,23 @@ namespace dlib
        inline operator __m256() const { return x; }
        // truncate to 32bit integers
-        operator __m256i() const { return _mm256_cvttps_epi32(x); }
+        inline operator __m256i() const { return _mm256_cvttps_epi32(x); }
-        void load_aligned(const type* ptr)  { x = _mm256_load_ps(ptr); }
+        inline void load_aligned(const type* ptr)  { x = _mm256_load_ps(ptr); }
-        void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); }
+        inline void store_aligned(type* ptr) const { _mm256_store_ps(ptr, x); }
-        void load(const type* ptr)          { x = _mm256_loadu_ps(ptr); }
+        inline void load(const type* ptr)          { x = _mm256_loadu_ps(ptr); }
-        void store(type* ptr)         const { _mm256_storeu_ps(ptr, x); }
+        inline void store(type* ptr)         const { _mm256_storeu_ps(ptr, x); }
-        unsigned int size() const { return 8; }
+        inline unsigned int size() const { return 8; }
-        float operator[](unsigned int idx) const 
+        inline float operator[](unsigned int idx) const 
        {
            float temp[8];
            store(temp);
            return temp[idx];
        }
-        simd4f low() const { return _mm256_castps256_ps128(x); }
+        inline simd4f low() const { return _mm256_castps256_ps128(x); }
-        simd4f high() const { return _mm256_extractf128_ps(x,1); }
+        inline simd4f high() const { return _mm256_extractf128_ps(x,1); }
    private:
        __m256 x;
@@ -63,20 +63,20 @@ namespace dlib
    public:
        typedef float type;
-        simd8f_bool() {}
+        inline simd8f_bool() {}
-        simd8f_bool(const __m256& val):x(val) {}
+        inline simd8f_bool(const __m256& val):x(val) {}
-        simd8f_bool(const simd4f_bool& low, const simd4f_bool& high)
+        inline simd8f_bool(const simd4f_bool& low, const simd4f_bool& high)
        {
            x = _mm256_insertf128_ps(_mm256_castps128_ps256(low),high,1);
        }
-        simd8f_bool& operator=(const __m256& val)
+        inline simd8f_bool& operator=(const __m256& val)
        {
            x = val;
            return *this;
        }
-        operator __m256() const { return x; }
+        inline operator __m256() const { return x; }
    private:
@@ -89,15 +89,15 @@ namespace dlib
    public:
        typedef float type;
-        simd8f() {}
+        inline simd8f() {}
-        simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){}
+        inline simd8f(const simd4f& low_, const simd4f& high_): _low(low_),_high(high_){}
-        simd8f(float f) :_low(f),_high(f) {}
+        inline simd8f(float f) :_low(f),_high(f) {}
-        simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) :
+        inline simd8f(float r0, float r1, float r2, float r3, float r4, float r5, float r6, float r7) :
            _low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
-        simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { }
+        inline simd8f(const simd8i& val) : _low(val.low()), _high(val.high()) { }
        // truncate to 32bit integers
-        operator simd8i::rawarray() const 
+        inline operator simd8i::rawarray() const 
        { 
            simd8i::rawarray temp;
            temp.low = simd4i(_low);
@@ -105,13 +105,13 @@ namespace dlib
            return temp;
        }
-        void load_aligned(const type* ptr)  { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
+        inline void load_aligned(const type* ptr)  { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
-        void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
+        inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
-        void load(const type* ptr)          { _low.load(ptr); _high.load(ptr+4); }
+        inline void load(const type* ptr)          { _low.load(ptr); _high.load(ptr+4); }
-        void store(type* ptr)         const { _low.store(ptr); _high.store(ptr+4); }
+        inline void store(type* ptr)         const { _low.store(ptr); _high.store(ptr+4); }
-        unsigned int size() const { return 8; }
+        inline unsigned int size() const { return 8; }
-        float operator[](unsigned int idx) const 
+        inline float operator[](unsigned int idx) const 
        {
            if (idx < 4)
                return _low[idx];
@@ -119,8 +119,8 @@ namespace dlib
                return _high[idx-4];
        }
-        simd4f low() const { return _low; }
+        inline simd4f low() const { return _low; }
-        simd4f high() const { return _high; }
+        inline simd4f high() const { return _high; }
    private:
        simd4f _low, _high;
@@ -131,12 +131,12 @@ namespace dlib
    public:
        typedef float type;
-        simd8f_bool() {}
+        inline simd8f_bool() {}
-        simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){}
+        inline simd8f_bool(const simd4f_bool& low_, const simd4f_bool& high_): _low(low_),_high(high_){}
-        simd4f_bool low() const { return _low; }
+        inline simd4f_bool low() const { return _low; }
-        simd4f_bool high() const { return _high; }
+        inline simd4f_bool high() const { return _high; }
    private:
        simd4f_bool _low,_high;
    };

--- a/dlib/simd/simd8i.h
+++ b/dlib/simd/simd8i.h
@@ -15,37 +15,37 @@ namespace dlib
    public:
        typedef int32 type;
-        simd8i() {}
+        inline simd8i() {}
-        simd8i(int32 f) { x = _mm256_set1_epi32(f); }
+        inline simd8i(int32 f) { x = _mm256_set1_epi32(f); }
-        simd8i(int32 r0, int32 r1, int32 r2, int32 r3,
+        inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3,
               int32 r4, int32 r5, int32 r6, int32 r7 ) 
        { x = _mm256_setr_epi32(r0,r1,r2,r3,r4,r5,r6,r7); }
-        simd8i(const __m256i& val):x(val) {}
+        inline simd8i(const __m256i& val):x(val) {}
-        simd8i(const simd4i& low, const simd4i& high)
+        inline simd8i(const simd4i& low, const simd4i& high)
        {
            x = _mm256_insertf128_si256(_mm256_castsi128_si256(low),high,1);
        }
-        simd8i& operator=(const __m256i& val)
+        inline simd8i& operator=(const __m256i& val)
        {
            x = val;
            return *this;
        }
-        operator __m256i() const { return x; }
+        inline operator __m256i() const { return x; }
-        void load_aligned(const type* ptr)  { x = _mm256_load_si256((const __m256i*)ptr); }
+        inline void load_aligned(const type* ptr)  { x = _mm256_load_si256((const __m256i*)ptr); }
-        void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); }
+        inline void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); }
-        void load(const type* ptr)          { x = _mm256_loadu_si256((const __m256i*)ptr); }
+        inline void load(const type* ptr)          { x = _mm256_loadu_si256((const __m256i*)ptr); }
-        void store(type* ptr)         const { _mm256_storeu_si256((__m256i*)ptr, x); }
+        inline void store(type* ptr)         const { _mm256_storeu_si256((__m256i*)ptr, x); }
-        simd4i low() const { return _mm256_castsi256_si128(x); }
+        inline simd4i low() const { return _mm256_castsi256_si128(x); }
-        simd4i high() const { return _mm256_extractf128_si256(x,1); }
+        inline simd4i high() const { return _mm256_extractf128_si256(x,1); }
-        unsigned int size() const { return 4; }
+        inline unsigned int size() const { return 4; }
-        int32 operator[](unsigned int idx) const 
+        inline int32 operator[](unsigned int idx) const 
        {
            int32 temp[8];
            store(temp);
@@ -61,29 +61,29 @@ namespace dlib
    public:
        typedef int32 type;
-        simd8i() {}
+        inline simd8i() {}
-        simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){}
+        inline simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){}
-        simd8i(int32 f) :_low(f),_high(f) {}
+        inline simd8i(int32 f) :_low(f),_high(f) {}
-        simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) :
+        inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) :
            _low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {}
        struct rawarray
        {
            simd4i low, high;
        };
-        simd8i(const rawarray& a) 
+        inline simd8i(const rawarray& a) 
        { 
            _low = a.low;
            _high = a.high;
        }
-        void load_aligned(const type* ptr)  { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
+        inline void load_aligned(const type* ptr)  { _low.load_aligned(ptr); _high.load_aligned(ptr+4); }
-        void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
+        inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); }
-        void load(const type* ptr)          { _low.load(ptr); _high.load(ptr+4); }
+        inline void load(const type* ptr)          { _low.load(ptr); _high.load(ptr+4); }
-        void store(type* ptr)         const { _low.store(ptr); _high.store(ptr+4); }
+        inline void store(type* ptr)         const { _low.store(ptr); _high.store(ptr+4); }
-        unsigned int size() const { return 8; }
+        inline unsigned int size() const { return 8; }
-        int32 operator[](unsigned int idx) const 
+        inline int32 operator[](unsigned int idx) const 
        {
            if (idx < 4)
                return _low[idx];
@@ -91,8 +91,8 @@ namespace dlib
                return _high[idx-4];
        }
-        simd4i low() const { return _low; }
+        inline simd4i low() const { return _low; }
-        simd4i high() const { return _high; }
+        inline simd4i high() const { return _high; }
    private:
        simd4i _low, _high;