Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
D
dlib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
dlib
Commits
7e7943cd
Commit
7e7943cd
authored
Dec 01, 2013
by
Davis King
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Made all the simd functions explicitly inline because otherwise visual studio 2010
won't inline them.
parent
f2cc77aa
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
120 deletions
+120
-120
simd4f.h
dlib/simd/simd4f.h
+32
-32
simd4i.h
dlib/simd/simd4i.h
+22
-22
simd8f.h
dlib/simd/simd8f.h
+38
-38
simd8i.h
dlib/simd/simd8i.h
+28
-28
No files found.
dlib/simd/simd4f.h
View file @
7e7943cd
...
...
@@ -17,11 +17,11 @@ namespace dlib
public
:
typedef
float
type
;
simd4f
()
{}
simd4f
(
float
f
)
{
x
=
_mm_set1_ps
(
f
);
}
simd4f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
)
{
x
=
_mm_setr_ps
(
r0
,
r1
,
r2
,
r3
);
}
simd4f
(
const
__m128
&
val
)
:
x
(
val
)
{}
simd4f
(
const
simd4i
&
val
)
:
x
(
_mm_cvtepi32_ps
(
val
))
{}
inline
simd4f
()
{}
inline
simd4f
(
float
f
)
{
x
=
_mm_set1_ps
(
f
);
}
inline
simd4f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
)
{
x
=
_mm_setr_ps
(
r0
,
r1
,
r2
,
r3
);
}
inline
simd4f
(
const
__m128
&
val
)
:
x
(
val
)
{}
inline
simd4f
(
const
simd4i
&
val
)
:
x
(
_mm_cvtepi32_ps
(
val
))
{}
inline
simd4f
&
operator
=
(
const
simd4i
&
val
)
{
...
...
@@ -29,24 +29,24 @@ namespace dlib
return
*
this
;
}
simd4f
&
operator
=
(
const
__m128
&
val
)
inline
simd4f
&
operator
=
(
const
__m128
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m128
()
const
{
return
x
;
}
inline
operator
__m128
()
const
{
return
x
;
}
// truncate to 32bit integers
operator
__m128i
()
const
{
return
_mm_cvttps_epi32
(
x
);
}
inline
operator
__m128i
()
const
{
return
_mm_cvttps_epi32
(
x
);
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm_load_ps
(
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm_store_ps
(
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm_loadu_ps
(
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm_storeu_ps
(
ptr
,
x
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm_load_ps
(
ptr
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_mm_store_ps
(
ptr
,
x
);
}
inline
void
load
(
const
type
*
ptr
)
{
x
=
_mm_loadu_ps
(
ptr
);
}
inline
void
store
(
type
*
ptr
)
const
{
_mm_storeu_ps
(
ptr
,
x
);
}
unsigned
int
size
()
const
{
return
4
;
}
float
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
4
;
}
inline
float
operator
[](
unsigned
int
idx
)
const
{
float
temp
[
4
];
store
(
temp
);
...
...
@@ -62,16 +62,16 @@ namespace dlib
public
:
typedef
float
type
;
simd4f_bool
()
{}
simd4f_bool
(
const
__m128
&
val
)
:
x
(
val
)
{}
inline
simd4f_bool
()
{}
inline
simd4f_bool
(
const
__m128
&
val
)
:
x
(
val
)
{}
simd4f_bool
&
operator
=
(
const
__m128
&
val
)
inline
simd4f_bool
&
operator
=
(
const
__m128
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m128
()
const
{
return
x
;
}
inline
operator
__m128
()
const
{
return
x
;
}
private
:
...
...
@@ -83,13 +83,13 @@ namespace dlib
public
:
typedef
float
type
;
simd4f
()
{}
simd4f
(
float
f
)
{
x
[
0
]
=
f
;
x
[
1
]
=
f
;
x
[
2
]
=
f
;
x
[
3
]
=
f
;
}
simd4f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
simd4f
(
const
simd4i
&
val
)
{
x
[
0
]
=
val
[
0
];
x
[
1
]
=
val
[
1
];
x
[
2
]
=
val
[
2
];
x
[
3
]
=
val
[
3
];}
inline
simd4f
()
{}
inline
simd4f
(
float
f
)
{
x
[
0
]
=
f
;
x
[
1
]
=
f
;
x
[
2
]
=
f
;
x
[
3
]
=
f
;
}
inline
simd4f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
inline
simd4f
(
const
simd4i
&
val
)
{
x
[
0
]
=
val
[
0
];
x
[
1
]
=
val
[
1
];
x
[
2
]
=
val
[
2
];
x
[
3
]
=
val
[
3
];}
// truncate to 32bit integers
operator
simd4i
::
rawarray
()
const
inline
operator
simd4i
::
rawarray
()
const
{
simd4i
::
rawarray
temp
;
temp
.
a
[
0
]
=
(
int32
)
x
[
0
];
...
...
@@ -109,7 +109,7 @@ namespace dlib
}
void
load_aligned
(
const
type
*
ptr
)
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
[
0
]
=
ptr
[
0
];
x
[
1
]
=
ptr
[
1
];
...
...
@@ -117,7 +117,7 @@ namespace dlib
x
[
3
]
=
ptr
[
3
];
}
void
store_aligned
(
type
*
ptr
)
const
inline
void
store_aligned
(
type
*
ptr
)
const
{
ptr
[
0
]
=
x
[
0
];
ptr
[
1
]
=
x
[
1
];
...
...
@@ -125,7 +125,7 @@ namespace dlib
ptr
[
3
]
=
x
[
3
];
}
void
load
(
const
type
*
ptr
)
inline
void
load
(
const
type
*
ptr
)
{
x
[
0
]
=
ptr
[
0
];
x
[
1
]
=
ptr
[
1
];
...
...
@@ -133,7 +133,7 @@ namespace dlib
x
[
3
]
=
ptr
[
3
];
}
void
store
(
type
*
ptr
)
const
inline
void
store
(
type
*
ptr
)
const
{
ptr
[
0
]
=
x
[
0
];
ptr
[
1
]
=
x
[
1
];
...
...
@@ -141,8 +141,8 @@ namespace dlib
ptr
[
3
]
=
x
[
3
];
}
unsigned
int
size
()
const
{
return
4
;
}
float
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
inline
unsigned
int
size
()
const
{
return
4
;
}
inline
float
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
private
:
float
x
[
4
];
...
...
@@ -153,10 +153,10 @@ namespace dlib
public
:
typedef
float
type
;
simd4f_bool
()
{}
simd4f_bool
(
bool
r0
,
bool
r1
,
bool
r2
,
bool
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
inline
simd4f_bool
()
{}
inline
simd4f_bool
(
bool
r0
,
bool
r1
,
bool
r2
,
bool
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
bool
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
inline
bool
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
private
:
bool
x
[
4
];
};
...
...
dlib/simd/simd4i.h
View file @
7e7943cd
...
...
@@ -15,26 +15,26 @@ namespace dlib
public
:
typedef
int32
type
;
simd4i
()
{}
simd4i
(
int32
f
)
{
x
=
_mm_set1_epi32
(
f
);
}
simd4i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
)
{
x
=
_mm_setr_epi32
(
r0
,
r1
,
r2
,
r3
);
}
simd4i
(
const
__m128i
&
val
)
:
x
(
val
)
{}
inline
simd4i
()
{}
inline
simd4i
(
int32
f
)
{
x
=
_mm_set1_epi32
(
f
);
}
inline
simd4i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
)
{
x
=
_mm_setr_epi32
(
r0
,
r1
,
r2
,
r3
);
}
inline
simd4i
(
const
__m128i
&
val
)
:
x
(
val
)
{}
simd4i
&
operator
=
(
const
__m128i
&
val
)
inline
simd4i
&
operator
=
(
const
__m128i
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m128i
()
const
{
return
x
;
}
inline
operator
__m128i
()
const
{
return
x
;
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm_load_si128
((
const
__m128i
*
)
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm_store_si128
((
__m128i
*
)
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm_loadu_si128
((
const
__m128i
*
)
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm_storeu_si128
((
__m128i
*
)
ptr
,
x
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm_load_si128
((
const
__m128i
*
)
ptr
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_mm_store_si128
((
__m128i
*
)
ptr
,
x
);
}
inline
void
load
(
const
type
*
ptr
)
{
x
=
_mm_loadu_si128
((
const
__m128i
*
)
ptr
);
}
inline
void
store
(
type
*
ptr
)
const
{
_mm_storeu_si128
((
__m128i
*
)
ptr
,
x
);
}
unsigned
int
size
()
const
{
return
4
;
}
int32
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
4
;
}
in
line
in
t32
operator
[](
unsigned
int
idx
)
const
{
int32
temp
[
4
];
store
(
temp
);
...
...
@@ -51,17 +51,17 @@ namespace dlib
public
:
typedef
int32
type
;
simd4i
()
{}
simd4i
(
int32
f
)
{
x
[
0
]
=
f
;
x
[
1
]
=
f
;
x
[
2
]
=
f
;
x
[
3
]
=
f
;
}
simd4i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
inline
simd4i
()
{}
inline
simd4i
(
int32
f
)
{
x
[
0
]
=
f
;
x
[
1
]
=
f
;
x
[
2
]
=
f
;
x
[
3
]
=
f
;
}
inline
simd4i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
)
{
x
[
0
]
=
r0
;
x
[
1
]
=
r1
;
x
[
2
]
=
r2
;
x
[
3
]
=
r3
;}
struct
rawarray
{
int32
a
[
4
];
};
simd4i
(
const
rawarray
&
a
)
{
x
[
0
]
=
a
.
a
[
0
];
x
[
1
]
=
a
.
a
[
1
];
x
[
2
]
=
a
.
a
[
2
];
x
[
3
]
=
a
.
a
[
3
];
}
inline
simd4i
(
const
rawarray
&
a
)
{
x
[
0
]
=
a
.
a
[
0
];
x
[
1
]
=
a
.
a
[
1
];
x
[
2
]
=
a
.
a
[
2
];
x
[
3
]
=
a
.
a
[
3
];
}
void
load_aligned
(
const
type
*
ptr
)
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
[
0
]
=
ptr
[
0
];
x
[
1
]
=
ptr
[
1
];
...
...
@@ -69,7 +69,7 @@ namespace dlib
x
[
3
]
=
ptr
[
3
];
}
void
store_aligned
(
type
*
ptr
)
const
inline
void
store_aligned
(
type
*
ptr
)
const
{
ptr
[
0
]
=
x
[
0
];
ptr
[
1
]
=
x
[
1
];
...
...
@@ -77,7 +77,7 @@ namespace dlib
ptr
[
3
]
=
x
[
3
];
}
void
load
(
const
type
*
ptr
)
inline
void
load
(
const
type
*
ptr
)
{
x
[
0
]
=
ptr
[
0
];
x
[
1
]
=
ptr
[
1
];
...
...
@@ -85,7 +85,7 @@ namespace dlib
x
[
3
]
=
ptr
[
3
];
}
void
store
(
type
*
ptr
)
const
inline
void
store
(
type
*
ptr
)
const
{
ptr
[
0
]
=
x
[
0
];
ptr
[
1
]
=
x
[
1
];
...
...
@@ -93,8 +93,8 @@ namespace dlib
ptr
[
3
]
=
x
[
3
];
}
unsigned
int
size
()
const
{
return
4
;
}
int32
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
inline
unsigned
int
size
()
const
{
return
4
;
}
in
line
in
t32
operator
[](
unsigned
int
idx
)
const
{
return
x
[
idx
];
}
private
:
int32
x
[
4
];
...
...
dlib/simd/simd8f.h
View file @
7e7943cd
...
...
@@ -16,18 +16,18 @@ namespace dlib
public
:
typedef
float
type
;
simd8f
()
{}
simd8f
(
const
simd4f
&
low
,
const
simd4f
&
high
)
inline
simd8f
()
{}
inline
simd8f
(
const
simd4f
&
low
,
const
simd4f
&
high
)
{
x
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
low
),
high
,
1
);
}
simd8f
(
float
f
)
{
x
=
_mm256_set1_ps
(
f
);
}
inline
simd8f
(
float
f
)
{
x
=
_mm256_set1_ps
(
f
);
}
inline
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
{
x
=
_mm256_setr_ps
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8f
(
const
simd8i
&
val
)
:
x
(
_mm256_cvtepi32_ps
(
val
))
{}
simd8f
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f
&
operator
=
(
const
__m256
&
val
)
inline
simd8f
(
const
simd8i
&
val
)
:
x
(
_mm256_cvtepi32_ps
(
val
))
{}
inline
simd8f
(
const
__m256
&
val
)
:
x
(
val
)
{}
inline
simd8f
&
operator
=
(
const
__m256
&
val
)
{
x
=
val
;
return
*
this
;
...
...
@@ -35,23 +35,23 @@ namespace dlib
inline
operator
__m256
()
const
{
return
x
;
}
// truncate to 32bit integers
operator
__m256i
()
const
{
return
_mm256_cvttps_epi32
(
x
);
}
inline
operator
__m256i
()
const
{
return
_mm256_cvttps_epi32
(
x
);
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_ps
(
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_ps
(
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_ps
(
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_ps
(
ptr
,
x
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_ps
(
ptr
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_ps
(
ptr
,
x
);
}
inline
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_ps
(
ptr
);
}
inline
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_ps
(
ptr
,
x
);
}
unsigned
int
size
()
const
{
return
8
;
}
float
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
8
;
}
inline
float
operator
[](
unsigned
int
idx
)
const
{
float
temp
[
8
];
store
(
temp
);
return
temp
[
idx
];
}
simd4f
low
()
const
{
return
_mm256_castps256_ps128
(
x
);
}
simd4f
high
()
const
{
return
_mm256_extractf128_ps
(
x
,
1
);
}
inline
simd4f
low
()
const
{
return
_mm256_castps256_ps128
(
x
);
}
inline
simd4f
high
()
const
{
return
_mm256_extractf128_ps
(
x
,
1
);
}
private
:
__m256
x
;
...
...
@@ -63,20 +63,20 @@ namespace dlib
public
:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
__m256
&
val
)
:
x
(
val
)
{}
simd8f_bool
(
const
simd4f_bool
&
low
,
const
simd4f_bool
&
high
)
inline
simd8f_bool
()
{}
inline
simd8f_bool
(
const
__m256
&
val
)
:
x
(
val
)
{}
inline
simd8f_bool
(
const
simd4f_bool
&
low
,
const
simd4f_bool
&
high
)
{
x
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
low
),
high
,
1
);
}
simd8f_bool
&
operator
=
(
const
__m256
&
val
)
inline
simd8f_bool
&
operator
=
(
const
__m256
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256
()
const
{
return
x
;
}
inline
operator
__m256
()
const
{
return
x
;
}
private
:
...
...
@@ -89,15 +89,15 @@ namespace dlib
public
:
typedef
float
type
;
simd8f
()
{}
simd8f
(
const
simd4f
&
low_
,
const
simd4f
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd8f
(
float
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
:
inline
simd8f
()
{}
inline
simd8f
(
const
simd4f
&
low_
,
const
simd4f
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
inline
simd8f
(
float
f
)
:
_low
(
f
),
_high
(
f
)
{}
inline
simd8f
(
float
r0
,
float
r1
,
float
r2
,
float
r3
,
float
r4
,
float
r5
,
float
r6
,
float
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
simd8f
(
const
simd8i
&
val
)
:
_low
(
val
.
low
()),
_high
(
val
.
high
())
{
}
inline
simd8f
(
const
simd8i
&
val
)
:
_low
(
val
.
low
()),
_high
(
val
.
high
())
{
}
// truncate to 32bit integers
operator
simd8i
::
rawarray
()
const
inline
operator
simd8i
::
rawarray
()
const
{
simd8i
::
rawarray
temp
;
temp
.
low
=
simd4i
(
_low
);
...
...
@@ -105,13 +105,13 @@ namespace dlib
return
temp
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
inline
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
inline
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
unsigned
int
size
()
const
{
return
8
;
}
float
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
8
;
}
inline
float
operator
[](
unsigned
int
idx
)
const
{
if
(
idx
<
4
)
return
_low
[
idx
];
...
...
@@ -119,8 +119,8 @@ namespace dlib
return
_high
[
idx
-
4
];
}
simd4f
low
()
const
{
return
_low
;
}
simd4f
high
()
const
{
return
_high
;
}
inline
simd4f
low
()
const
{
return
_low
;
}
inline
simd4f
high
()
const
{
return
_high
;
}
private
:
simd4f
_low
,
_high
;
...
...
@@ -131,12 +131,12 @@ namespace dlib
public
:
typedef
float
type
;
simd8f_bool
()
{}
simd8f_bool
(
const
simd4f_bool
&
low_
,
const
simd4f_bool
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
inline
simd8f_bool
()
{}
inline
simd8f_bool
(
const
simd4f_bool
&
low_
,
const
simd4f_bool
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd4f_bool
low
()
const
{
return
_low
;
}
simd4f_bool
high
()
const
{
return
_high
;
}
inline
simd4f_bool
low
()
const
{
return
_low
;
}
inline
simd4f_bool
high
()
const
{
return
_high
;
}
private
:
simd4f_bool
_low
,
_high
;
};
...
...
dlib/simd/simd8i.h
View file @
7e7943cd
...
...
@@ -15,37 +15,37 @@ namespace dlib
public
:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
int32
f
)
{
x
=
_mm256_set1_epi32
(
f
);
}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
inline
simd8i
()
{}
inline
simd8i
(
int32
f
)
{
x
=
_mm256_set1_epi32
(
f
);
}
inline
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
{
x
=
_mm256_setr_epi32
(
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
);
}
simd8i
(
const
__m256i
&
val
)
:
x
(
val
)
{}
inline
simd8i
(
const
__m256i
&
val
)
:
x
(
val
)
{}
simd8i
(
const
simd4i
&
low
,
const
simd4i
&
high
)
inline
simd8i
(
const
simd4i
&
low
,
const
simd4i
&
high
)
{
x
=
_mm256_insertf128_si256
(
_mm256_castsi128_si256
(
low
),
high
,
1
);
}
simd8i
&
operator
=
(
const
__m256i
&
val
)
inline
simd8i
&
operator
=
(
const
__m256i
&
val
)
{
x
=
val
;
return
*
this
;
}
operator
__m256i
()
const
{
return
x
;
}
inline
operator
__m256i
()
const
{
return
x
;
}
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_si256
((
const
__m256i
*
)
ptr
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_si256
((
__m256i
*
)
ptr
,
x
);
}
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_si256
((
const
__m256i
*
)
ptr
);
}
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
x
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
x
=
_mm256_load_si256
((
const
__m256i
*
)
ptr
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_mm256_store_si256
((
__m256i
*
)
ptr
,
x
);
}
inline
void
load
(
const
type
*
ptr
)
{
x
=
_mm256_loadu_si256
((
const
__m256i
*
)
ptr
);
}
inline
void
store
(
type
*
ptr
)
const
{
_mm256_storeu_si256
((
__m256i
*
)
ptr
,
x
);
}
simd4i
low
()
const
{
return
_mm256_castsi256_si128
(
x
);
}
simd4i
high
()
const
{
return
_mm256_extractf128_si256
(
x
,
1
);
}
inline
simd4i
low
()
const
{
return
_mm256_castsi256_si128
(
x
);
}
inline
simd4i
high
()
const
{
return
_mm256_extractf128_si256
(
x
,
1
);
}
unsigned
int
size
()
const
{
return
4
;
}
int32
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
4
;
}
in
line
in
t32
operator
[](
unsigned
int
idx
)
const
{
int32
temp
[
8
];
store
(
temp
);
...
...
@@ -61,29 +61,29 @@ namespace dlib
public
:
typedef
int32
type
;
simd8i
()
{}
simd8i
(
const
simd4i
&
low_
,
const
simd4i
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
simd8i
(
int32
f
)
:
_low
(
f
),
_high
(
f
)
{}
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
:
inline
simd8i
()
{}
inline
simd8i
(
const
simd4i
&
low_
,
const
simd4i
&
high_
)
:
_low
(
low_
),
_high
(
high_
){}
inline
simd8i
(
int32
f
)
:
_low
(
f
),
_high
(
f
)
{}
inline
simd8i
(
int32
r0
,
int32
r1
,
int32
r2
,
int32
r3
,
int32
r4
,
int32
r5
,
int32
r6
,
int32
r7
)
:
_low
(
r0
,
r1
,
r2
,
r3
),
_high
(
r4
,
r5
,
r6
,
r7
)
{}
struct
rawarray
{
simd4i
low
,
high
;
};
simd8i
(
const
rawarray
&
a
)
inline
simd8i
(
const
rawarray
&
a
)
{
_low
=
a
.
low
;
_high
=
a
.
high
;
}
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
inline
void
load_aligned
(
const
type
*
ptr
)
{
_low
.
load_aligned
(
ptr
);
_high
.
load_aligned
(
ptr
+
4
);
}
inline
void
store_aligned
(
type
*
ptr
)
const
{
_low
.
store_aligned
(
ptr
);
_high
.
store_aligned
(
ptr
+
4
);
}
inline
void
load
(
const
type
*
ptr
)
{
_low
.
load
(
ptr
);
_high
.
load
(
ptr
+
4
);
}
inline
void
store
(
type
*
ptr
)
const
{
_low
.
store
(
ptr
);
_high
.
store
(
ptr
+
4
);
}
unsigned
int
size
()
const
{
return
8
;
}
int32
operator
[](
unsigned
int
idx
)
const
inline
unsigned
int
size
()
const
{
return
8
;
}
in
line
in
t32
operator
[](
unsigned
int
idx
)
const
{
if
(
idx
<
4
)
return
_low
[
idx
];
...
...
@@ -91,8 +91,8 @@ namespace dlib
return
_high
[
idx
-
4
];
}
simd4i
low
()
const
{
return
_low
;
}
simd4i
high
()
const
{
return
_high
;
}
inline
simd4i
low
()
const
{
return
_low
;
}
inline
simd4i
high
()
const
{
return
_high
;
}
private
:
simd4i
_low
,
_high
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment