Commit 4b321057 authored by Marcin Tolysz's avatar Marcin Tolysz Committed by Lucas Hosseini

Improve arm64 support. (#676)

* Improve Arm64 support.

Make library compile and run on Arm64 8.2 (Nvidia Xavier arm+cuda)
`configure` is regenerated

* remove simd from utils.cpp
parent b06cf412
# serial 1
AC_DEFUN([AX_CPU_ARCH], [
AC_MSG_CHECKING([for cpu arch])
AC_CANONICAL_TARGET
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CXXFLAGS="-m64"
;;
aarch64*-*)
dnl This is an arch for Nvidia Xavier a proper detection would be nice.
ARCH_CPUFLAGS="-march=armv8.2-a"
;;
*) ;;
esac
AC_MSG_RESULT([$target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS])
AC_SUBST(ARCH_CPUFLAGS)
AC_SUBST(ARCH_CXXFLAGS)
])dnl
...@@ -626,6 +626,12 @@ ac_includes_default="\ ...@@ -626,6 +626,12 @@ ac_includes_default="\
ac_header_list= ac_header_list=
ac_subst_vars='LTLIBOBJS ac_subst_vars='LTLIBOBJS
ARCH_CXXFLAGS
ARCH_CPUFLAGS
target_os
target_vendor
target_cpu
target
LAPACK_LIBS LAPACK_LIBS
OPENMP_LDFLAGS OPENMP_LDFLAGS
BLAS_LIBS BLAS_LIBS
...@@ -686,6 +692,7 @@ infodir ...@@ -686,6 +692,7 @@ infodir
docdir docdir
oldincludedir oldincludedir
includedir includedir
runstatedir
localstatedir localstatedir
sharedstatedir sharedstatedir
sysconfdir sysconfdir
...@@ -767,6 +774,7 @@ datadir='${datarootdir}' ...@@ -767,6 +774,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc' sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com' sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var' localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include' includedir='${prefix}/include'
oldincludedir='/usr/include' oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
...@@ -1019,6 +1027,15 @@ do ...@@ -1019,6 +1027,15 @@ do
| -silent | --silent | --silen | --sile | --sil) | -silent | --silent | --silen | --sile | --sil)
silent=yes ;; silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;; ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
...@@ -1156,7 +1173,7 @@ fi ...@@ -1156,7 +1173,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \ datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir libdir localedir mandir runstatedir
do do
eval ac_val=\$$ac_var eval ac_val=\$$ac_var
# Remove trailing slashes. # Remove trailing slashes.
...@@ -1309,6 +1326,7 @@ Fine tuning of the installation directories: ...@@ -1309,6 +1326,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var] --localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib] --libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include] --includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include] --oldincludedir=DIR C header files for non-gcc [/usr/include]
...@@ -1329,6 +1347,7 @@ _ACEOF ...@@ -1329,6 +1347,7 @@ _ACEOF
System types: System types:
--build=BUILD configure for building on BUILD [guessed] --build=BUILD configure for building on BUILD [guessed]
--host=HOST cross-compile to build programs to run on HOST [BUILD] --host=HOST cross-compile to build programs to run on HOST [BUILD]
--target=TARGET configure for building compilers for TARGET [HOST]
_ACEOF _ACEOF
fi fi
...@@ -5455,6 +5474,8 @@ main () ...@@ -5455,6 +5474,8 @@ main ()
if (*(data + i) != *(data3 + i)) if (*(data + i) != *(data3 + i))
return 14; return 14;
close (fd); close (fd);
free (data);
free (data3);
return 0; return 0;
} }
_ACEOF _ACEOF
...@@ -6814,6 +6835,68 @@ if test "x$ax_lapack_ok" == "xno"; then ...@@ -6814,6 +6835,68 @@ if test "x$ax_lapack_ok" == "xno"; then
as_fn_error $? "An implementation of LAPACK is required but none was found." "$LINENO" 5 as_fn_error $? "An implementation of LAPACK is required but none was found." "$LINENO" 5
fi fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5
$as_echo_n "checking target system type... " >&6; }
if ${ac_cv_target+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "x$target_alias" = x; then
ac_cv_target=$ac_cv_host
else
ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5
$as_echo "$ac_cv_target" >&6; }
case $ac_cv_target in
*-*-*) ;;
*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;;
esac
target=$ac_cv_target
ac_save_IFS=$IFS; IFS='-'
set x $ac_cv_target
shift
target_cpu=$1
target_vendor=$2
shift; shift
# Remember, the first character of IFS is used to create $*,
# except with old shells:
target_os=$*
IFS=$ac_save_IFS
case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac
# The aliases save the names the user supplied, while $host etc.
# will get canonicalized.
test -n "$target_alias" &&
test "$program_prefix$program_suffix$program_transform_name" = \
NONENONEs,x,x, &&
program_prefix=${target_alias}-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpu arch" >&5
$as_echo_n "checking for cpu arch... " >&6; }
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CXXFLAGS="-m64"
;;
aarch64*-*)
ARCH_CPUFLAGS="-march=armv8.2-a"
;;
*) ;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS" >&5
$as_echo "$target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS" >&6; }
ac_config_files="$ac_config_files makefile.inc" ac_config_files="$ac_config_files makefile.inc"
......
...@@ -64,6 +64,7 @@ if test "x$ax_lapack_ok" == "xno"; then ...@@ -64,6 +64,7 @@ if test "x$ax_lapack_ok" == "xno"; then
AC_MSG_ERROR([An implementation of LAPACK is required but none was found.]) AC_MSG_ERROR([An implementation of LAPACK is required but none was found.])
fi fi
AX_CPU_ARCH
AC_CONFIG_FILES([makefile.inc]) AC_CONFIG_FILES([makefile.inc])
AC_OUTPUT AC_OUTPUT
...@@ -9,8 +9,8 @@ CXXCPP = @CXXCPP@ ...@@ -9,8 +9,8 @@ CXXCPP = @CXXCPP@
# TODO: Investigate the LAPACKE wrapper for LAPACK, which defines the correct # TODO: Investigate the LAPACKE wrapper for LAPACK, which defines the correct
# type for FORTRAN integers. # type for FORTRAN integers.
CPPFLAGS = -DFINTEGER=int @CPPFLAGS@ CPPFLAGS = -DFINTEGER=int @CPPFLAGS@
CXXFLAGS = -fPIC @OPENMP_CXXFLAGS@ -m64 -Wno-sign-compare @CXXFLAGS@ CXXFLAGS = -fPIC @OPENMP_CXXFLAGS@ @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
CPUFLAGS = -msse4 -mpopcnt CPUFLAGS = @ARCH_CPUFLAGS@
LDFLAGS = @OPENMP_LDFLAGS@ @LDFLAGS@ LDFLAGS = @OPENMP_LDFLAGS@ @LDFLAGS@
LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@
PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@ PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@
......
...@@ -21,8 +21,6 @@ ...@@ -21,8 +21,6 @@
#include <omp.h> #include <omp.h>
#include <immintrin.h>
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment