Commit 2ed907aa authored by miscellanea's avatar miscellanea Committed by Davis E. King

Add arm files for libpng from official libpng v1.6.7 to support neon (#262) (#264)

Now dlib can be compiled for android with -DANDROID_ABI="armeabi-v7a with NEON" with https://github.com/taka-no-me/android-cmake
However, when compiling for arm64-v8a, since __ARM_NEON__ is not defined,
neon code will not be actually enabled, which requires upgrading libpng a bit more
parent e6614b50
......@@ -344,6 +344,15 @@ if (NOT TARGET dlib)
external/zlib/uncompr.c
external/zlib/zutil.c
)
if (NEON)
enable_language(ASM)
set(source_files ${source_files}
external/libpng/arm/arm_init.c
external/libpng/arm/filter_neon_intrinsics.c
external/libpng/arm/filter_neon.S
)
set_source_files_properties(external/libpng/arm/filter_neon.S PROPERTIES COMPILE_FLAGS "${CMAKE_ASM_FLAGS} ${CMAKE_CXX_FLAGS} -x assembler-with-cpp")
endif()
set(REQUIRES_LIBS "")
endif()
set(source_files ${source_files}
......
/* arm_init.c - NEON optimised filter functions
*
* Copyright (c) 2013 Glenn Randers-Pehrson
* Written by Mans Rullgard, 2011.
* Last changed in libpng 1.6.6 [September 16, 2013]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
* called.
*/
#define _POSIX_SOURCE 1
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_ARM_NEON_OPT > 0
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */
#include <signal.h> /* for sig_atomic_t */
#ifdef __ANDROID__
/* Linux provides access to information about CPU capabilites via
* /proc/self/auxv, however Android blocks this while still claiming to be
* Linux. The Andoid NDK, however, provides appropriate support.
*
* Documentation: http://www.kandroid.org/ndk/docs/CPU-ARM-NEON.html
*/
#include <cpu-features.h>
static int
png_have_neon(png_structp png_ptr)
{
/* This is a whole lot easier than the mess below, however it is probably
* implemented as below, therefore it is better to cache the result (these
* function calls may be slow!)
*/
PNG_UNUSED(png_ptr)
return android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
(android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
}
#elif defined(__linux__)
/* The generic __linux__ implementation requires reading /proc/self/auxv and
* looking at each element for one that records NEON capabilities.
*/
#include <unistd.h> /* for POSIX 1003.1 */
#include <errno.h> /* for EINTR */
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <elf.h>
#include <asm/hwcap.h>
/* A read call may be interrupted, in which case it returns -1 and sets errno to
* EINTR if nothing was done, otherwise (if something was done) a partial read
* may result.
*/
static size_t
safe_read(png_structp png_ptr, int fd, void *buffer_in, size_t nbytes)
{
size_t ntotal = 0;
char *buffer = png_voidcast(char*, buffer_in);
while (nbytes > 0)
{
unsigned int nread;
int iread;
/* Passing nread > INT_MAX to read is implementation defined in POSIX
* 1003.1, therefore despite the unsigned argument portable code must
* limit the value to INT_MAX!
*/
if (nbytes > INT_MAX)
nread = INT_MAX;
else
nread = (unsigned int)/*SAFE*/nbytes;
iread = read(fd, buffer, nread);
if (iread == -1)
{
/* This is the devil in the details, a read can terminate early with 0
* bytes read because of EINTR, yet it still returns -1 otherwise end
* of file cannot be distinguished.
*/
if (errno != EINTR)
{
png_warning(png_ptr, "/proc read failed");
return 0; /* I.e. a permanent failure */
}
}
else if (iread < 0)
{
/* Not a valid 'read' result: */
png_warning(png_ptr, "OS /proc read bug");
return 0;
}
else if (iread > 0)
{
/* Continue reading until a permanent failure, or EOF */
buffer += iread;
nbytes -= (unsigned int)/*SAFE*/iread;
ntotal += (unsigned int)/*SAFE*/iread;
}
else
return ntotal;
}
return ntotal; /* nbytes == 0 */
}
static int
png_have_neon(png_structp png_ptr)
{
int fd = open("/proc/self/auxv", O_RDONLY);
Elf32_auxv_t aux;
/* Failsafe: failure to open means no NEON */
if (fd == -1)
{
png_warning(png_ptr, "/proc/self/auxv open failed");
return 0;
}
while (safe_read(png_ptr, fd, &aux, sizeof aux) == sizeof aux)
{
if (aux.a_type == AT_HWCAP && (aux.a_un.a_val & HWCAP_NEON) != 0)
{
close(fd);
return 1;
}
}
close(fd);
return 0;
}
#else
/* We don't know how to do a run-time check on this system */
# error "no support for run-time ARM NEON checks"
#endif /* OS checks */
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
void
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
{
/* The switch statement is compiled in for ARM_NEON_API, the call to
* png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined
* the check is only performed if the API has not set the NEON option on
* or off explicitly. In this case the check controls what happens.
*
* If the CHECK is not compiled in and the option is UNSET the behavior prior
* to 1.6.7 was to use the NEON code - this was a bug caused by having the
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h
*/
#ifdef PNG_ARM_NEON_API_SUPPORTED
switch ((pp->options >> PNG_ARM_NEON) & 3)
{
case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled -
* thus both API and CHECK can be turned on. If it isn't supported
* this case will fall through to the 'default' below, which just
* returns.
*/
#endif /* PNG_ARM_NEON_API_SUPPORTED */
#ifdef PNG_ARM_NEON_CHECK_SUPPORTED
{
static volatile sig_atomic_t no_neon = -1; /* not checked */
if (no_neon < 0)
no_neon = !png_have_neon(pp);
if (no_neon)
return;
}
#ifdef PNG_ARM_NEON_API_SUPPORTED
break;
#endif
#endif /* PNG_ARM_NEON_CHECK_SUPPORTED */
#ifdef PNG_ARM_NEON_API_SUPPORTED
default: /* OFF or INVALID */
return;
case PNG_OPTION_ON:
/* Option turned on */
break;
}
#endif
/* IMPORTANT: any new external functions used here must be declared using
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
* 'prefix' option to configure works:
*
* ./configure --with-libpng-prefix=foobar_
*
* Verify you have got this right by running the above command, doing a build
* and examining pngprefix.h; it must contain a #define for every external
* function you add. (Notice that this happens automatically for the
* initialization function.)
*/
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_neon;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_neon;
}
}
#endif /* PNG_ARM_NEON_OPT > 0 */
#endif /* PNG_READ_SUPPORTED */
/* filter_neon.S - NEON optimised filter functions
*
* Copyright (c) 2013 Glenn Randers-Pehrson
* Written by Mans Rullgard, 2011.
* Last changed in libpng 1.6.7 [November 14, 2013]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
/* This is required to get the symbol renames, which are #defines, and also
* includes the definition (or not) of PNG_ARM_NEON_OPT.
*/
#define PNG_VERSION_INFO_ONLY
#include "../pngpriv.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
#endif
/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
* ARM64). The code in arm/filter_neon_intrinsics.c supports ARM64, however it
* only works if -mfpu=neon is specified on the GCC command line. See pngpriv.h
* for the logic which sets PNG_USE_ARM_NEON_ASM:
*/
#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
#ifdef PNG_READ_SUPPORTED
#if PNG_ARM_NEON_OPT > 0
#ifdef __ELF__
# define ELF
#else
# define ELF @
#endif
.arch armv7-a
.fpu neon
.macro func name, export=0
.macro endfunc
ELF .size \name, . - \name
.endfunc
.purgem endfunc
.endm
.text
.if \export
.global \name
.endif
ELF .type \name, STT_FUNC
.func \name
\name:
.endm
func png_read_filter_row_sub4_neon, export=1
ldr r3, [r0, #4] @ rowbytes
vmov.i8 d3, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vadd.u8 d0, d3, d4
vadd.u8 d1, d0, d5
vadd.u8 d2, d1, d6
vadd.u8 d3, d2, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r3, r3, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_sub3_neon, export=1
ldr r3, [r0, #4] @ rowbytes
vmov.i8 d3, #0
mov r0, r1
mov r2, #3
mov r12, #12
vld1.8 {q11}, [r0], r12
1:
vext.8 d5, d22, d23, #3
vadd.u8 d0, d3, d22
vext.8 d6, d22, d23, #6
vadd.u8 d1, d0, d5
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], r12
vst1.32 {d0[0]}, [r1,:32], r2
vadd.u8 d2, d1, d6
vst1.32 {d1[0]}, [r1], r2
vadd.u8 d3, d2, d7
vst1.32 {d2[0]}, [r1], r2
vst1.32 {d3[0]}, [r1], r2
subs r3, r3, #12
bgt 1b
bx lr
endfunc
func png_read_filter_row_up_neon, export=1
ldr r3, [r0, #4] @ rowbytes
1:
vld1.8 {q0}, [r1,:128]
vld1.8 {q1}, [r2,:128]!
vadd.u8 q0, q0, q1
vst1.8 {q0}, [r1,:128]!
subs r3, r3, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_avg4_neon, export=1
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
vhadd.u8 d0, d3, d16
vadd.u8 d0, d0, d4
vhadd.u8 d1, d0, d17
vadd.u8 d1, d1, d5
vhadd.u8 d2, d1, d18
vadd.u8 d2, d2, d6
vhadd.u8 d3, d2, d19
vadd.u8 d3, d3, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r12, r12, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_avg3_neon, export=1
push {r4,lr}
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
mov r0, r1
mov r4, #3
mov lr, #12
vld1.8 {q11}, [r0], lr
1:
vld1.8 {q10}, [r2], lr
vext.8 d5, d22, d23, #3
vhadd.u8 d0, d3, d20
vext.8 d17, d20, d21, #3
vadd.u8 d0, d0, d22
vext.8 d6, d22, d23, #6
vhadd.u8 d1, d0, d17
vext.8 d18, d20, d21, #6
vadd.u8 d1, d1, d5
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], lr
vst1.32 {d0[0]}, [r1,:32], r4
vhadd.u8 d2, d1, d18
vst1.32 {d1[0]}, [r1], r4
vext.8 d19, d21, d21, #1
vadd.u8 d2, d2, d6
vhadd.u8 d3, d2, d19
vst1.32 {d2[0]}, [r1], r4
vadd.u8 d3, d3, d7
vst1.32 {d3[0]}, [r1], r4
subs r12, r12, #12
bgt 1b
pop {r4,pc}
endfunc
.macro paeth rx, ra, rb, rc
vaddl.u8 q12, \ra, \rb @ a + b
vaddl.u8 q15, \rc, \rc @ 2*c
vabdl.u8 q13, \rb, \rc @ pa
vabdl.u8 q14, \ra, \rc @ pb
vabd.u16 q15, q12, q15 @ pc
vcle.u16 q12, q13, q14 @ pa <= pb
vcle.u16 q13, q13, q15 @ pa <= pc
vcle.u16 q14, q14, q15 @ pb <= pc
vand q12, q12, q13 @ pa <= pb && pa <= pc
vmovn.u16 d28, q14
vmovn.u16 \rx, q12
vbsl d28, \rb, \rc
vbsl \rx, \ra, d28
.endm
func png_read_filter_row_paeth4_neon, export=1
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
vmov.i8 d20, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
paeth d0, d3, d16, d20
vadd.u8 d0, d0, d4
paeth d1, d0, d17, d16
vadd.u8 d1, d1, d5
paeth d2, d1, d18, d17
vadd.u8 d2, d2, d6
paeth d3, d2, d19, d18
vmov d20, d19
vadd.u8 d3, d3, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r12, r12, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_paeth3_neon, export=1
push {r4,lr}
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
vmov.i8 d4, #0
mov r0, r1
mov r4, #3
mov lr, #12
vld1.8 {q11}, [r0], lr
1:
vld1.8 {q10}, [r2], lr
paeth d0, d3, d20, d4
vext.8 d5, d22, d23, #3
vadd.u8 d0, d0, d22
vext.8 d17, d20, d21, #3
paeth d1, d0, d17, d20
vst1.32 {d0[0]}, [r1,:32], r4
vext.8 d6, d22, d23, #6
vadd.u8 d1, d1, d5
vext.8 d18, d20, d21, #6
paeth d2, d1, d18, d17
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], lr
vst1.32 {d1[0]}, [r1], r4
vadd.u8 d2, d2, d6
vext.8 d19, d21, d21, #1
paeth d3, d2, d19, d18
vst1.32 {d2[0]}, [r1], r4
vmov d4, d19
vadd.u8 d3, d3, d7
vst1.32 {d3[0]}, [r1], r4
subs r12, r12, #12
bgt 1b
pop {r4,pc}
endfunc
#endif /* PNG_ARM_NEON_OPT > 0 */
#endif /* PNG_READ_SUPPORTED */
#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment