Commit 4b321057 authored by Marcin Tolysz's avatar Marcin Tolysz Committed by Lucas Hosseini

Improve arm64 support. (#676)

* Improve Arm64 support.

Make library compile and run on Arm64 8.2 (Nvidia Xavier arm+cuda)
`configure` is regenerated

* remove simd from utils.cpp
parent b06cf412
# serial 1
AC_DEFUN([AX_CPU_ARCH], [
AC_MSG_CHECKING([for cpu arch])
AC_CANONICAL_TARGET
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CXXFLAGS="-m64"
;;
aarch64*-*)
dnl This is an arch for Nvidia Xavier a proper detection would be nice.
ARCH_CPUFLAGS="-march=armv8.2-a"
;;
*) ;;
esac
AC_MSG_RESULT([$target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS])
AC_SUBST(ARCH_CPUFLAGS)
AC_SUBST(ARCH_CXXFLAGS)
])dnl
......@@ -626,6 +626,12 @@ ac_includes_default="\
ac_header_list=
ac_subst_vars='LTLIBOBJS
ARCH_CXXFLAGS
ARCH_CPUFLAGS
target_os
target_vendor
target_cpu
target
LAPACK_LIBS
OPENMP_LDFLAGS
BLAS_LIBS
......@@ -686,6 +692,7 @@ infodir
docdir
oldincludedir
includedir
runstatedir
localstatedir
sharedstatedir
sysconfdir
......@@ -767,6 +774,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
......@@ -1019,6 +1027,15 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
-runstatedir | --runstatedir | --runstatedi | --runstated \
| --runstate | --runstat | --runsta | --runst | --runs \
| --run | --ru | --r)
ac_prev=runstatedir ;;
-runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
| --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
| --run=* | --ru=* | --r=*)
runstatedir=$ac_optarg ;;
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
......@@ -1156,7 +1173,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
libdir localedir mandir
libdir localedir mandir runstatedir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
......@@ -1309,6 +1326,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
--runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
......@@ -1329,6 +1347,7 @@ _ACEOF
System types:
--build=BUILD configure for building on BUILD [guessed]
--host=HOST cross-compile to build programs to run on HOST [BUILD]
--target=TARGET configure for building compilers for TARGET [HOST]
_ACEOF
fi
......@@ -5455,6 +5474,8 @@ main ()
if (*(data + i) != *(data3 + i))
return 14;
close (fd);
free (data);
free (data3);
return 0;
}
_ACEOF
......@@ -6814,6 +6835,68 @@ if test "x$ax_lapack_ok" == "xno"; then
as_fn_error $? "An implementation of LAPACK is required but none was found." "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5
$as_echo_n "checking target system type... " >&6; }
if ${ac_cv_target+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "x$target_alias" = x; then
ac_cv_target=$ac_cv_host
else
ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
as_fn_error $? "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5
$as_echo "$ac_cv_target" >&6; }
case $ac_cv_target in
*-*-*) ;;
*) as_fn_error $? "invalid value of canonical target" "$LINENO" 5;;
esac
target=$ac_cv_target
ac_save_IFS=$IFS; IFS='-'
set x $ac_cv_target
shift
target_cpu=$1
target_vendor=$2
shift; shift
# Remember, the first character of IFS is used to create $*,
# except with old shells:
target_os=$*
IFS=$ac_save_IFS
case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac
# The aliases save the names the user supplied, while $host etc.
# will get canonicalized.
test -n "$target_alias" &&
test "$program_prefix$program_suffix$program_transform_name" = \
NONENONEs,x,x, &&
program_prefix=${target_alias}-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cpu arch" >&5
$as_echo_n "checking for cpu arch... " >&6; }
case $target in
amd64-* | x86_64-*)
ARCH_CPUFLAGS="-msse4 -mpopcnt"
ARCH_CXXFLAGS="-m64"
;;
aarch64*-*)
ARCH_CPUFLAGS="-march=armv8.2-a"
;;
*) ;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS" >&5
$as_echo "$target CPUFLAGS+=$ARCH_CPUFLAGS CXXFLAGS+=$ARCH_CXXFLAGS" >&6; }
ac_config_files="$ac_config_files makefile.inc"
......
......@@ -64,6 +64,7 @@ if test "x$ax_lapack_ok" == "xno"; then
AC_MSG_ERROR([An implementation of LAPACK is required but none was found.])
fi
AX_CPU_ARCH
AC_CONFIG_FILES([makefile.inc])
AC_OUTPUT
......@@ -9,8 +9,8 @@ CXXCPP = @CXXCPP@
# TODO: Investigate the LAPACKE wrapper for LAPACK, which defines the correct
# type for FORTRAN integers.
CPPFLAGS = -DFINTEGER=int @CPPFLAGS@
CXXFLAGS = -fPIC @OPENMP_CXXFLAGS@ -m64 -Wno-sign-compare @CXXFLAGS@
CPUFLAGS = -msse4 -mpopcnt
CXXFLAGS = -fPIC @OPENMP_CXXFLAGS@ @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
CPUFLAGS = @ARCH_CPUFLAGS@
LDFLAGS = @OPENMP_LDFLAGS@ @LDFLAGS@
LIBS = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@
PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@
......
......@@ -21,8 +21,6 @@
#include <omp.h>
#include <immintrin.h>
#include <algorithm>
#include <vector>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment