Add conda packages metadata + tests. (#769)

+ Add conda packages metadata (now building Faiss using conda's toolchain); + add Dockerfile for building conda packages (for all CUDA versions); + add working Dockerfile building faiss on Centos7; + simplify GPU build; + avoid falling back to CPU-only version (python); + simplify TravisCI config; + update INSTALL.md; + add configure flag for specifying target architectures (--with-cuda-arch); + fix Makefile for gpu tests; + fix various Makefile issues; + remove stale file (gpu/utils/DeviceUtils.cpp).

Add conda packages metadata + tests. (#769)
+ Add conda packages metadata (now building Faiss using conda's toolchain); + add Dockerfile for building conda packages (for all CUDA versions); + add working Dockerfile building faiss on Centos7; + simplify GPU build; + avoid falling back to CPU-only version (python); + simplify TravisCI config; + update INSTALL.md; + add configure flag for specifying target architectures (--with-cuda-arch); + fix Makefile for gpu tests; + fix various Makefile issues; + remove stale file (gpu/utils/DeviceUtils.cpp).
7f5b22b0 · Lucas Hosseini · GitHub · 6c1cb3cd · 7f5b22b0 · 7f5b22b0
Unverified Commit 7f5b22b0 authored Apr 05, 2019 by Lucas Hosseini Committed by GitHub Apr 05, 2019
34 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: gcc
      addons:
@@ -25,7 +27,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: gcc
      addons:
@@ -35,7 +39,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: clang
      addons:
@@ -45,8 +51,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
        # NOTE: Hack, c.f. https://github.com/travis-ci/travis-ci/issues/8613
        - LD_LIBRARY_PATH="/usr/local/clang/lib"
    - os: osx
@@ -69,8 +76,9 @@ install:
  - ./.travis/install.sh
  - aclocal
  - autoconf
-  - ./configure
+  - ./configure --without-cuda
  - make
+  - make -C python

 script:
  - make test
--- a/.travis/install.sh
+++ b/.travis/install.sh
@@ -7,7 +7,7 @@ function installswig() {
    cd /tmp/ &&
        wget https://github.com/swig/swig/archive/rel-3.0.12.tar.gz &&
        tar zxf rel-3.0.12.tar.gz && cd swig-rel-3.0.12 &&
-        ./autogen.sh && ./configure --prefix "${HOME}"/swig/ 1>/dev/null &&
+        ./autogen.sh && ./configure --prefix "${HOME}" 1>/dev/null &&
        make >/dev/null &&
        make install >/dev/null
 }

--- a/Dockerfile
+++ b/Dockerfile
-FROM nvidia/cuda:8.0-devel-ubuntu16.04
-MAINTAINER Pierre Letessier <pletessier@ina.fr>
+FROM nvidia/cuda:8.0-devel-centos7

-RUN apt-get update -y
-RUN apt-get install -y libopenblas-dev python-numpy python-dev swig git python-pip wget
+# Install MKL
+RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
+RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
+RUN yum install -y intel-mkl-2019.3-062
+ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
+ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
+ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
+/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
+/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so

-RUN pip install --upgrade pip
-RUN pip install matplotlib
+# Install necessary build tools
+RUN yum install -y gcc-c++ make swig3
+
+# Install necesary headers/libs
+RUN yum install -y python-devel numpy

 COPY . /opt/faiss

 WORKDIR /opt/faiss

-ENV BLASLDFLAGS /usr/lib/libopenblas.so.0
-
-RUN mv example_makefiles/makefile.inc.Linux ./makefile.inc
-
-RUN make tests/test_blas -j $(nproc) && \
-    make -j $(nproc) && \
-    make demos/demo_sift1M -j $(nproc) && \
-    make py
-
-RUN cd gpu && \
-    make -j $(nproc) && \
-    make test/demo_ivfpq_indexing_gpu && \
-    make py
-
-ENV PYTHONPATH $PYTHONPATH:/opt/faiss
-
-# RUN ./tests/test_blas && \
-#     tests/demo_ivfpq_indexing
-
-
-# RUN wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz && \
-#     tar xf sift.tar.gz && \
-#     mv sift sift1M
-
-# RUN tests/demo_sift1M
+# --with-cuda=/usr/local/cuda-8.0 
+RUN ./configure --without-cuda
+RUN make -j $(nproc)
+RUN make -C python
+RUN make test
+RUN make install
+RUN make -C demos demo_ivfpq_indexing && \
+    LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ./demos/demo_ivfpq_indexing
--- a/INSTALL.md
+++ b/INSTALL.md
--- a/Index.h
+++ b/Index.h
@@ -18,8 +18,8 @@
 #include <sstream>

 #define FAISS_VERSION_MAJOR 1
-#define FAISS_VERSION_MINOR 4
-#define FAISS_VERSION_PATCH 0
+#define FAISS_VERSION_MINOR 5
+#define FAISS_VERSION_PATCH 1

 /**
 * @namespace faiss

--- a/Makefile
+++ b/Makefile
@@ -6,19 +6,35 @@

 -include makefile.inc

-SRC=$(wildcard *.cpp)
-OBJ=$(SRC:.cpp=.o)
+HEADERS     = $(wildcard *.h)
+SRC         = $(wildcard *.cpp)
+OBJ         = $(SRC:.cpp=.o)
+INSTALLDIRS = $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
+
+GPU_HEADERS = $(wildcard gpu/*.h gpu/impl/*.h gpu/utils/*.h)
+GPU_CPPSRC  = $(wildcard gpu/*.cpp gpu/impl/*.cpp gpu/utils/*.cpp)
+GPU_CUSRC   = $(wildcard gpu/*.cu gpu/impl/*.cu gpu/utils/*.cu \
+gpu/utils/nvidia/*.cu gpu/utils/blockselect/*.cu gpu/utils/warpselect/*.cu)
+GPU_SRC     = $(GPU_CPPSRC) $(GPU_CUSRC)
+GPU_CPPOBJ  = $(GPU_CPPSRC:.cpp=.o)
+GPU_CUOBJ   = $(GPU_CUSRC:.cu=.o)
+GPU_OBJ     = $(GPU_CPPOBJ) $(GPU_CUOBJ)
+GPU_INSTALLDIRS = $(DESTDIR)$(includedir)/faiss/gpu/{impl,utils}
+
+ifneq ($(strip $(NVCC)),)
+	OBJ         += $(GPU_OBJ)
+	INSTALLDIRS += $(GPU_INSTALLDIRS)
+	HEADERS     += $(GPU_HEADERS)
+endif


 ############################
 # Building

-default: libfaiss.a
-
 all: libfaiss.a libfaiss.$(SHAREDEXT)

 libfaiss.a: $(OBJ)
-	ar r $@ $^
+	$(AR) r $@ $^

 libfaiss.$(SHAREDEXT): $(OBJ)
 	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
@@ -26,8 +42,11 @@ libfaiss.$(SHAREDEXT): $(OBJ)
 %.o: %.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@

+%.o: %.cu
+	$(NVCC) $(NVCCFLAGS) -g -O3 -c $< -o $@
+
 clean:
-	rm -f libfaiss.*
+	rm -f libfaiss.{a,$(SHAREDEXT)}
 	rm -f $(OBJ)


@@ -35,15 +54,14 @@ clean:
 # Installing

 install: libfaiss.a libfaiss.$(SHAREDEXT) installdirs
-	cp libfaiss.a libfaiss.$(SHAREDEXT) $(DESTDIR)$(libdir)
-	cp *.h $(DESTDIR)$(includedir)/faiss/
+	cp libfaiss.{a,$(SHAREDEXT)} $(DESTDIR)$(libdir)
+	tar cf - $(HEADERS) | tar xf - -C $(DESTDIR)$(includedir)/faiss/

 installdirs:
-	$(MKDIR_P) $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
+	$(MKDIR_P) $(INSTALLDIRS)

 uninstall:
-	rm $(DESTDIR)$(libdir)/libfaiss.a
-	rm $(DESTDIR)$(libdir)/libfaiss.$(SHAREDEXT)
+	rm -f $(DESTDIR)$(libdir)/libfaiss.{a,$(SHAREDEXT)}
 	rm -rf $(DESTDIR)$(includedir)/faiss


@@ -52,41 +70,44 @@ uninstall:

 -include depend

-# The above makefile.dep is generated by the following target:
-depend:
-	for i in $(SRC); do \
-		$(CXXCPP) $(CPPFLAGS) -MM $$i; \
+depend: $(SRC) $(GPU_SRC)
+	for i in $^; do \
+		$(CXXCPP) $(CPPFLAGS) -x c++ -MM $$i; \
 	done > depend


+#############################
+# Python
+
+py: libfaiss.a
+	$(MAKE) -C python
+
+
 #############################
 # Tests

 test: libfaiss.a py
-	make -C tests run
+	$(MAKE) -C tests run
 	PYTHONPATH=./python/build/`ls python/build | grep lib` \
 	$(PYTHON) -m unittest discover tests/ -v

+test_gpu: libfaiss.a
+	$(MAKE) -C gpu/test run
+	PYTHONPATH=./python/build/`ls python/build | grep lib` \
+	$(PYTHON) -m unittest discover gpu/test/ -v

 #############################
 # Demos

 demos: libfaiss.a
-	make -C demos
+	$(MAKE) -C demos


 #############################
 # Misc

 misc/test_blas: misc/test_blas.cpp
-	$(CXX) $(CXXFLAG) $(LDFLAGS) -o $@ $^ $(LIBS)
-
-
-#############################
-# Python
-
-py:
-	$(MAKE) -C python build
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)


-.PHONY: all clean default demos install installdirs py test uninstall
+.PHONY: all clean demos install installdirs py test gpu_test uninstall
--- a/acinclude/fa_check_cuda.m4
+++ b/acinclude/fa_check_cuda.m4
 AC_DEFUN([FA_CHECK_CUDA], [

 AC_ARG_WITH(cuda,
-[AS_HELP_STRING([--with-cuda=<prefix>], [prefix of the CUDA installation])])
-case $with_cuda in
-"") cuda_prefix=/usr/local/cuda ;;
-*) cuda_prefix="$with_cuda"
-esac
+  [AS_HELP_STRING([--with-cuda=<prefix>], [prefix of the CUDA installation])])
+AC_ARG_WITH(cuda-arch,
+  [AS_HELP_STRING([--with-cuda-arch=<gencodes>], [device specific -gencode flags])],
+  [],
+  [with_cuda_arch=default])

-AC_CHECK_PROG(NVCC, "nvcc", "$cuda_prefix/bin/nvcc", "", "$cuda_prefix/bin")
-fa_nvcc_bin=$NVCC
+if test x$with_cuda != xno; then
+  if test x$with_cuda != x; then
+    cuda_prefix=$with_cuda
+    AC_CHECK_PROG(NVCC, [nvcc], [$cuda_prefix/bin/nvcc], [], [$cuda_prefix/bin])
+    NVCC_CPPFLAGS="-I$cuda_prefix/include"
+    NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+  else
+    AC_CHECK_PROGS(NVCC, [nvcc /usr/local/cuda/bin/nvcc], [])
+    if test "x$NVCC" == "x/usr/local/cuda/bin/nvcc"; then
+      cuda_prefix="/usr/local/cuda"
+      NVCC_CPPFLAGS="-I$cuda_prefix/include"
+      NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+    else
+      cuda_prefix=""
+      NVCC_CPPFLAGS=""
+      NVCC_LDFLAGS=""
+    fi
+  fi
+
+  if test "x$NVCC" == x; then
+    AC_MSG_ERROR([Couldn't find nvcc])
+  fi
+
+  if test "x$with_cuda_arch" == xdefault; then
+    with_cuda_arch="-gencode=arch=compute_35,code=compute_35 \\
+-gencode=arch=compute_52,code=compute_52 \\
+-gencode=arch=compute_60,code=compute_60 \\
+-gencode=arch=compute_61,code=compute_61 \\
+-gencode=arch=compute_70,code=compute_70 \\
+-gencode=arch=compute_75,code=compute_75"
+  fi

-if test x$fa_nvcc_bin != x; then
  fa_save_CPPFLAGS="$CPPFLAGS"
  fa_save_LDFLAGS="$LDFLAGS"
  fa_save_LIBS="$LIBS"

-  NVCC_CPPFLAGS="-I$cuda_prefix/include"
-  NVCC_LDFLAGS="-L$cuda_prefix/lib64"
-
  CPPFLAGS="$NVCC_CPPFLAGS $CPPFLAGS"
  LDFLAGS="$NVCC_LDFLAGS $LDFLAGS"

  AC_CHECK_HEADER([cuda.h], [], AC_MSG_FAILURE([Couldn't find cuda.h]))
-  AC_CHECK_LIB([cuda], [cuInit], [], AC_MSG_FAILURE([Couldn't find libcuda]))
  AC_CHECK_LIB([cublas], [cublasAlloc], [], AC_MSG_FAILURE([Couldn't find libcublas]))
  AC_CHECK_LIB([cudart], [cudaSetDevice], [], AC_MSG_FAILURE([Couldn't find libcudart]))

@@ -32,8 +56,6 @@ if test x$fa_nvcc_bin != x; then
  CPPFLAGS="$fa_save_CPPFLAGS"
  LDFLAGS="$fa_save_LDFLAGS"
  LIBS="$fa_save_LIBS"
-else
-  cuda_prefix=""
 fi

 AC_SUBST(NVCC)
@@ -41,4 +63,5 @@ AC_SUBST(NVCC_CPPFLAGS)
 AC_SUBST(NVCC_LDFLAGS)
 AC_SUBST(NVCC_LIBS)
 AC_SUBST(CUDA_PREFIX, $cuda_prefix)
+AC_SUBST(CUDA_ARCH, $with_cuda_arch)
 ])
--- a/acinclude/fa_prog_swig.m4
+++ b/acinclude/fa_prog_swig.m4
@@ -3,10 +3,9 @@ AC_DEFUN([FA_PROG_SWIG], [
 AC_ARG_WITH(swig,
 [AS_HELP_STRING([--with-swig=<bin>], [use SWIG binary <bin>])])
 case $with_swig in
-  "") SWIG_BIN=swig ;;
-  *) SWIG_BIN="$with_swig"
+ "") AC_CHECK_PROG(SWIG, swig, swig);;
+  *) SWIG="$with_swig"
 esac

-AC_CHECK_PROG(SWIG, $SWIG_BIN, $SWIG_BIN)
 AC_SUBST(SWIG)
 ])
--- a/acinclude/fa_python.m4
+++ b/acinclude/fa_python.m4
@@ -10,36 +10,12 @@ esac
 AC_CHECK_PROG(PYTHON, $PYTHON_BIN, $PYTHON_BIN)
 fa_python_bin=$PYTHON

-AC_ARG_WITH(python-config,
-  [AS_HELP_STRING([--with-python-config=<bin>], [use Python config binary <bin>])])
-case $with_python_config in
-  "") PYTHON_CFG_BIN="${PYTHON_BIN}-config" ;;
-  *) PYTHON_CFG_BIN="$with_python_config"
-esac
-
-AC_CHECK_PROG(PYTHON_CFG, $PYTHON_CFG_BIN, $PYTHON_CFG_BIN)
-fa_python_cfg_bin=$PYTHON_CFG
-
-if test x$fa_python_cfg_bin != x; then
-  AC_MSG_CHECKING([for Python C flags])
-  fa_python_cflags=`${PYTHON_CFG} --includes`
-  if test x"$fa_python_cflags" == x; then
-    AC_MSG_RESULT([not found])
-    AC_MSG_WARN([You won't be able to build the python interface.])
-  else
-    AC_MSG_RESULT($fa_python_cflags)
-    AC_SUBST(PYTHON_CFLAGS, $fa_python_cflags)
-  fi
+AC_MSG_CHECKING([for Python C flags])
+fa_python_cflags=`$PYTHON -c "
+import sysconfig
+paths = [['-I' + sysconfig.get_path(p) for p in ['include', 'platinclude']]]
+print(' '.join(paths))"`
+AC_MSG_RESULT($fa_python_cflags)
+AC_SUBST(PYTHON_CFLAGS, "$PYTHON_CFLAGS $fa_python_cflags")

-  AC_MSG_CHECKING([for Python ld flags])
-  fa_python_ldflags=`${PYTHON_CFG} --ldflags`
-  if test x"$fa_python_ldflags" == x; then
-    AC_MSG_RESULT([not found])
-  else
-    AC_MSG_RESULT($fa_python_ldflags)
-    AC_SUBST(PYTHON_LDFLAGS, $fa_python_ldflags)
-  fi
-else
-  AC_MSG_WARN([You won't be able to build the python interface.])
-fi
-])
+])dnl FA_PYTHON
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
+FROM soumith/conda-cuda:latest
+
+COPY ./ faiss
+WORKDIR /faiss/conda
+
+ENV FAISS_BUILD_VERSION 1.5.1
+ENV FAISS_BUILD_NUMBER  1
+RUN conda build faiss --no-anaconda-upload -c pytorch
+RUN CUDA_ROOT=/usr/local/cuda-8.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "8.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
+RUN CUDA_ROOT=/usr/local/cuda-9.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61 \
+               -gencode=arch=compute_70,code=compute_70" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "9.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
+RUN CUDA_ROOT=/usr/local/cuda-10.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61 \
+               -gencode=arch=compute_70,code=compute_70 \
+               -gencode=arch=compute_72,code=compute_72 \
+               -gencode=arch=compute_75,code=compute_75" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "10.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
--- a/conda/conda_build_config.yaml
+++ b/conda/conda_build_config.yaml
+CONDA_BUILD_SYSROOT:
+  - /opt/MacOSX10.9.sdk        # [osx]
+python:
+  - 2.7
+  - 3.5
+  - 3.6
+  - 3.7
--- a/conda/faiss-gpu/build.sh
+++ b/conda/faiss-gpu/build.sh
+./configure --with-cuda=$CUDA_ROOT --with-cuda-arch="$CUDA_ARCH"
+make -j $CPU_COUNT
+cd python
+make
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt
--- a/conda/faiss-gpu/conda_build_config.yaml
+++ b/conda/faiss-gpu/conda_build_config.yaml
+cxx_compiler_version:
+  - 5.4
+cudatoolkit:
+  - 8.0
+  - 9.0
+  - 9.2
+  - 10.0
+#  - 10.1 # cudatoolkit 10.1 packages are not yet available on Anaconda.
+pin_run_as_build:
+  cudatoolkit:
+    max_pin: x.x
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
+package:
+  name: faiss-gpu
+  version: "{{ FAISS_BUILD_VERSION }}"
+
+source:
+  git_url: ../../
+
+requirements:
+  build:
+    - {{ compiler('cxx') }}
+    - llvm-openmp # [osx]
+    - setuptools
+    - swig
+
+  host:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy 1.11.*
+    - mkl >=2018
+    - cudatoolkit {{ cudatoolkit }}
+
+  run:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy >=1.11
+    - mkl >=2018
+    - blas=*=mkl
+    - {{ pin_compatible('cudatoolkit') }}
+
+build:
+  number: {{ FAISS_BUILD_NUMBER }}
+  script_env:
+    - CUDA_ROOT
+    - CUDA_ARCH
+
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: BSD 3-Clause
+  license_family: BSD
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
--- a/conda/faiss-gpu/run_test.py
+++ b/conda/faiss-gpu/run_test.py
+import faiss
+import numpy as np
+
+d = 128
+n = 100
+
+rs = np.random.RandomState(1337)
+x = rs.rand(n, d).astype(np.float32)
+
+index = faiss.IndexFlatL2(d)
+
+res = faiss.StandardGpuResources()
+gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
+gpu_index.add(x)
+
+D, I = index.search(x, 10)
--- a/conda/faiss/build.sh
+++ b/conda/faiss/build.sh
+./configure --without-cuda
+make -j $CPU_COUNT
+cd python
+make
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt
--- a/conda/faiss/meta.yaml
+++ b/conda/faiss/meta.yaml
+package:
+  name: faiss-cpu
+  version: "{{ FAISS_BUILD_VERSION }}"
+
+source:
+  git_url: ../../
+
+requirements:
+  build:
+    - {{ compiler('cxx') }}
+    - llvm-openmp # [osx]
+    - setuptools
+    - swig
+
+  host:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy 1.11.*
+    - mkl >=2018
+
+  run:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy >=1.11
+    - blas=*=mkl
+    - mkl >=2018
+
+build:
+  number: {{ FAISS_BUILD_NUMBER }}
+
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: BSD 3-Clause
+  license_family: BSD
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
--- a/conda/faiss/run_test.py
+++ b/conda/faiss/run_test.py
+import faiss
+import numpy as np
+
+d = 128
+# NOTE: BLAS kicks in only when n > distance_compute_blas_threshold = 20
+n = 100
+
+rs = np.random.RandomState(1337)
+x = rs.rand(n, d).astype(np.float32)
+
+index = faiss.IndexFlatL2(d)
+index.add(x)
+
+D, I = index.search(x, 10)
--- a/configure
+++ b/configure
--- a/demos/Makefile
+++ b/demos/Makefile
@@ -15,8 +15,8 @@ all: $(DEMOS)
 clean:
 	rm -f $(DEMOS)

-%: %.cpp ../libfaiss.a
-	$(CXX) -o $@ $(CXXFLAGS) $^ $(LDFLAGS) $(LIBS)
+%: %.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) -lfaiss


 .PHONY: all clean
--- a/depend
+++ b/depend
--- a/gpu/Makefile
+++ b/gpu/Makefile
-# Copyright (c) 2015-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the BSD+Patents license found in the
-# LICENSE file in the root directory of this source tree.
-
-include ../makefile.inc
-
-all: libgpufaiss.a libgpufaiss.$(SHAREDEXT)
-
-CPPOBJ =    GpuResources.o \
-            IndexProxy.o \
-            StandardGpuResources.o \
-            GpuAutoTune.o \
-            GpuClonerOptions.o \
-            impl/RemapIndices.o \
-            utils/DeviceMemory.o \
-            utils/StackDeviceMemory.o \
-            utils/DeviceUtils.o \
-            utils/Timer.o \
-            utils/MemorySpace.o \
-            utils/WorkerThread.o
-
-
-INS = 1 32 64 128 256 F512 T512 F1024 T1024
-
-CUOBJ =     impl/BinaryDistance.o \
-            impl/BinaryFlatIndex.o \
-            impl/BroadcastSum.o \
-            impl/Distance.o \
-            impl/FlatIndex.o \
-            impl/InvertedListAppend.o \
-            impl/IVFBase.o \
-            impl/IVFFlat.o \
-            impl/IVFFlatScan.o \
-            impl/IVFPQ.o \
-            impl/IVFUtils.o \
-            impl/IVFUtilsSelect1.o \
-            impl/IVFUtilsSelect2.o \
-            impl/L2Norm.o \
-            impl/L2Select.o \
-            impl/PQCodeDistances.o \
-            impl/PQScanMultiPassNoPrecomputed.o \
-            impl/PQScanMultiPassPrecomputed.o \
-            impl/VectorResidual.o \
-            GpuDistance.o \
-            GpuIndex.o \
-            GpuIndexBinaryFlat.o \
-            GpuIndexFlat.o \
-            GpuIndexIVF.o \
-            GpuIndexIVFFlat.o \
-            GpuIndexIVFPQ.o  \
-            utils/Float16.o \
-            utils/MatrixMult.o \
-            utils/BlockSelectFloat.o \
-            utils/BlockSelectHalf.o \
-            utils/WarpSelectFloat.o \
-            utils/WarpSelectHalf.o \
-            utils/nvidia/fp16_emu.o \
-            $(foreach v,$(INS),   \
-                utils/blockselect/BlockSelectHalf$(v).o \
-                utils/blockselect/BlockSelectFloat$(v).o \
-                utils/warpselect/WarpSelectHalf$(v).o \
-                utils/warpselect/WarpSelectFloat$(v).o \
-             )
-
-%.o: %.cpp
-	$(CXX) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@ $(CUDACFLAGS)
-
-%.o: %.cu
-	$(NVCC) $(NVCCFLAGS) -g -O3 -c $< -o $@
-
-libgpufaiss.a: $(CPPOBJ) $(CUOBJ)
-	ar r $@ $^
-
-libgpufaiss.$(SHAREDEXT): $(CPPOBJ) $(CUOBJ)
-	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) $(NVCCLDFLAGS) \
-		-o libgpufaiss.$(SHAREDEXT) $^ $(LIBS) $(NVCCLIBS)
-
-clean:
-	rm -rf *.o impl/*.o utils/*.o libgpufaiss.a \
-		libgpufaiss.$(SHAREDEXT) \
-
-include depend
-
-depend:
-	for i in $(patsubst %.o,%.cpp,$(CPPOBJ)) \
-		 $(patsubst %.o,%.cu,$(CUOBJ)); do \
-	    $(CXXCPP) $(CPPFLAGS) -x c++ -MM $$i; \
-	done > depend
-
-install: libgpufaiss.a libgpufaiss.$(SHAREDEXT) installdirs
-	cp libgpufaiss.a libgpufaiss.$(SHAREDEXT) $(DESTDIR)$(libdir)
-	cp *.h $(DESTDIR)$(includedir)/faiss/gpu
-	cp --parents **/**.h $(DESTDIR)$(includedir)/faiss/gpu
-
-installdirs:
-	$(MKDIR_P) $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss/gpu
-
-.PHONY: all clean
--- a/gpu/test/Makefile
+++ b/gpu/test/Makefile
@@ -6,13 +6,45 @@

 -include ../../makefile.inc

-%.o: %.cpp
-	$(CXX) $(CUDACFLAGS) -o $@ -c $^
+TESTS_SRC = TestGpuIndexFlat.cpp TestGpuIndexIVFPQ.cpp \
+TestGpuIndexBinaryFlat.cpp TestGpuIndexIVFFlat.cpp TestGpuMemoryException.cpp
+CUDA_TESTS_SRC = TestGpuSelect.cu

-demo_ivfpq_indexing_gpu: demo_ivfpq_indexing_gpu.o ../libgpufaiss.a ../../libfaiss.a
-	$(CXX) $(LDFLAGS) $(NVCCLDFLAGS) -o $@ $^ $(LIBS) $(NVCCLIBS)
+TESTS_BIN = $(TESTS_SRC:.cpp=)
+CUDA_TESTS_BIN = $(CUDA_TESTS_SRC:.cu=)
+
+
+# test_gpu_index.py test_pytorch_faiss.py
+
+run: $(TESTS_BIN) $(CUDA_TESTS_BIN)
+	for t in $(TESTS_BIN) $(CUDA_TESTS_BIN); do ./$$t; done
+
+%.o: %.cu gtest
+	$(NVCC) $(NVCCFLAGS) -g -O3 -o $@ -c $< -Igtest/include
+
+%.o: %.cpp gtest
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ -c $< -Igtest/include
+
+%: %.o TestUtils.o ../../libfaiss.a gtest/make/gtest.a
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)
+
+demo_ivfpq_indexing_gpu: demo_ivfpq_indexing_gpu.o ../../libfaiss.a
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)
+
+demo_ivfpq_indexing_gpu.o: demo_ivfpq_indexing_gpu.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ -c $^
+
+gtest/make/gtest.a: gtest
+	$(MAKE) -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest.a
+
+gtest:
+	curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar xz && \
+	mv googletest-release-1.8.0/googletest gtest && \
+	rm -rf googletest-release-1.8.0

 clean:
-	rm -f *.o demo_ivfpq_indexing_gpu
+	rm -f *.o $(TESTS_BIN)
+	rm -rf gtest
+	rm -f demo_ivfpq_indexing_gpu

 .PHONY: clean
--- a/gpu/utils/DeviceUtils.cpp
+++ b/gpu/utils/DeviceUtils.cpp
-/**
- * Copyright (c) 2015-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD+Patents license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-
-#include "DeviceUtils.h"
-#include "../../FaissAssert.h"
-#include <mutex>
-#include <unordered_map>
-
-namespace faiss { namespace gpu {
-
-int getCurrentDevice() {
-  int dev = -1;
-  CUDA_VERIFY(cudaGetDevice(&dev));
-  FAISS_ASSERT(dev != -1);
-
-  return dev;
-}
-
-void setCurrentDevice(int device) {
-  CUDA_VERIFY(cudaSetDevice(device));
-}
-
-int getNumDevices() {
-  int numDev = -1;
-  CUDA_VERIFY(cudaGetDeviceCount(&numDev));
-  FAISS_ASSERT(numDev != -1);
-
-  return numDev;
-}
-
-void synchronizeAllDevices() {
-  for (int i = 0; i < getNumDevices(); ++i) {
-    DeviceScope scope(i);
-
-    CUDA_VERIFY(cudaDeviceSynchronize());
-  }
-}
-
-const cudaDeviceProp& getDeviceProperties(int device) {
-  static std::mutex mutex;
-  static std::unordered_map<int, cudaDeviceProp> properties;
-
-  std::lock_guard<std::mutex> guard(mutex);
-
-  auto it = properties.find(device);
-  if (it == properties.end()) {
-    cudaDeviceProp prop;
-    CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
-
-    properties[device] = prop;
-    it = properties.find(device);
-  }
-
-  return it->second;
-}
-
-const cudaDeviceProp& getCurrentDeviceProperties() {
-  return getDeviceProperties(getCurrentDevice());
-}
-
-int getMaxThreads(int device) {
-  return getDeviceProperties(device).maxThreadsPerBlock;
-}
-
-int getMaxThreadsCurrentDevice() {
-  return getMaxThreads(getCurrentDevice());
-}
-
-size_t getMaxSharedMemPerBlock(int device) {
-  return getDeviceProperties(device).sharedMemPerBlock;
-}
-
-size_t getMaxSharedMemPerBlockCurrentDevice() {
-  return getMaxSharedMemPerBlock(getCurrentDevice());
-}
-
-int getDeviceForAddress(const void* p) {
-  if (!p) {
-    return -1;
-  }
-
-  cudaPointerAttributes att;
-  cudaError_t err = cudaPointerGetAttributes(&att, p);
-  FAISS_ASSERT(err == cudaSuccess ||
-         err == cudaErrorInvalidValue);
-
-  if (err == cudaErrorInvalidValue) {
-    // Make sure the current thread error status has been reset
-    err = cudaGetLastError();
-    FAISS_ASSERT(err == cudaErrorInvalidValue);
-    return -1;
-  } else if (att.memoryType == cudaMemoryTypeHost) {
-    return -1;
-  } else {
-    return att.device;
-  }
-}
-
-bool getFullUnifiedMemSupport(int device) {
-  const auto& prop = getDeviceProperties(device);
-  return (prop.major >= 6);
-}
-
-bool getFullUnifiedMemSupportCurrentDevice() {
-  return getFullUnifiedMemSupport(getCurrentDevice());
-}
-
-DeviceScope::DeviceScope(int device) {
-  prevDevice_ = getCurrentDevice();
-
-  if (prevDevice_ != device) {
-    setCurrentDevice(device);
-  } else {
-    prevDevice_ = -1;
-  }
-}
-
-DeviceScope::~DeviceScope() {
-  if (prevDevice_ != -1) {
-    setCurrentDevice(prevDevice_);
-  }
-}
-
-CublasHandleScope::CublasHandleScope() {
-  auto blasStatus = cublasCreate(&blasHandle_);
-  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
-}
-
-CublasHandleScope::~CublasHandleScope() {
-  auto blasStatus = cublasDestroy(blasHandle_);
-  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
-}
-
-CudaEvent::CudaEvent(cudaStream_t stream)
-    : event_(0) {
-  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
-  CUDA_VERIFY(cudaEventRecord(event_, stream));
-}
-
-CudaEvent::CudaEvent(CudaEvent&& event) noexcept
-    : event_(std::move(event.event_)) {
-  event.event_ = 0;
-}
-
-CudaEvent::~CudaEvent() {
-  if (event_) {
-    CUDA_VERIFY(cudaEventDestroy(event_));
-  }
-}
-
-CudaEvent&
-CudaEvent::operator=(CudaEvent&& event) noexcept {
-  event_ = std::move(event.event_);
-  event.event_ = 0;
-
-  return *this;
-}
-
-void
-CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
-  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
-}
-
-void
-CudaEvent::cpuWaitOnEvent() {
-  CUDA_VERIFY(cudaEventSynchronize(event_));
-}
-
-} } // namespace
--- a/makefile.inc.in
+++ b/makefile.inc.in
@@ -6,27 +6,20 @@

 CXX          = @CXX@
 CXXCPP       = @CXXCPP@
-# TODO: Investigate the LAPACKE wrapper for LAPACK, which defines the correct
-#   type for FORTRAN integers.
-CPPFLAGS     = -DFINTEGER=int @CPPFLAGS@
-CXXFLAGS     = -fPIC @OPENMP_CXXFLAGS@ @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
+CPPFLAGS     = -DFINTEGER=int @CPPFLAGS@ @OPENMP_CXXFLAGS@ @NVCC_CPPFLAGS@
+CXXFLAGS     = -fPIC @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
 CPUFLAGS     = @ARCH_CPUFLAGS@
-LDFLAGS      = @OPENMP_LDFLAGS@ @LDFLAGS@
-LIBS         = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@
+LDFLAGS      = @OPENMP_CXXFLAGS@ @LDFLAGS@ @NVCC_LDFLAGS@
+LIBS         = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ @NVCC_LIBS@
 PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@

 NVCC         = @NVCC@
-NVCCLDFLAGS  = @NVCC_LDFLAGS@
-NVCCLIBS     = @NVCC_LIBS@
-CUDAROOT     = @CUDA_PREFIX@
-CUDACFLAGS   = @NVCC_CPPFLAGS@
-NVCCFLAGS    = -I $(CUDAROOT)/targets/x86_64-linux/include/ \
+CUDA_ROOT    = @CUDA_PREFIX@
+CUDA_ARCH    = @CUDA_ARCH@
+NVCCFLAGS    = -I $(CUDA_ROOT)/targets/x86_64-linux/include/ \
 -Xcompiler -fPIC \
 -Xcudafe --diag_suppress=unrecognized_attribute \
-gencode arch=compute_35,code="compute_35" \
-gencode arch=compute_52,code="compute_52" \
-gencode arch=compute_60,code="compute_60" \
-gencode arch=compute_61,code="compute_61" \
+$(CUDA_ARCH) \
 -lineinfo \
 -ccbin $(CXX) -DFAISS_USE_FLOAT16

@@ -43,6 +36,7 @@ endif
 MKDIR_P      = @MKDIR_P@
 PYTHON       = @PYTHON@
 SWIG         = @SWIG@
+AR          ?= ar 

 prefix      ?= @prefix@
 exec_prefix ?= @exec_prefix@

--- a/python/Makefile
+++ b/python/Makefile
@@ -6,52 +6,33 @@

 -include ../makefile.inc

-HEADERS = $(wildcard ../*.h)
+ifneq ($(strip $(NVCC)),)
+	SWIGFLAGS = -DGPU_WRAPPER
+endif

-all: cpu build
-
-#############################
-# CPU
-
-cpu: _swigfaiss.so
+all: build

 # Also silently generates swigfaiss.py.
-swigfaiss_wrap.cpp: swigfaiss.swig $(HEADERS)
-	$(SWIG) -python -c++ -Doverride= -I../ -o $@ $<
+swigfaiss.cpp: swigfaiss.swig ../libfaiss.a
+	$(SWIG) -python -c++ -Doverride= -I../ $(SWIGFLAGS) -o $@ $<

-swigfaiss_wrap.o: swigfaiss_wrap.cpp
-	$(CXX) $(CXXFLAGS) $(CPUFLAGS) $(PYTHONCFLAGS) -I../ -c $< -o $@
+swigfaiss.o: swigfaiss.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) $(PYTHONCFLAGS) \
+               -I../ -c $< -o $@

 # Extension is .so even on OSX.
-_swigfaiss.so: swigfaiss_wrap.o ../libfaiss.a
+_swigfaiss.so: swigfaiss.o ../libfaiss.a
 	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)

-
-#############################
-# GPU
-
-gpu: _swigfaiss_gpu.so
-
-# Also silently generates swigfaiss.py.
-swigfaiss_gpu_wrap.cpp: swigfaiss.swig
-	$(SWIG) -python -c++ -Doverride= -I../ -DGPU_WRAPPER -o $@ $<
-
-swigfaiss_gpu_wrap.o: swigfaiss_gpu_wrap.cpp
-	$(NVCC) $(NVCCFLAGS) $(PYTHONCFLAGS) -I../ -c $< -o $@
-
-_swigfaiss_gpu.so: swigfaiss_gpu_wrap.o ../gpu/libgpufaiss.a ../libfaiss.a
-	$(CXX) $(SHAREDFLAGS) $(NVCCLDFLAGS) $(LDFLAGS) -o $@ $^ $(NVCCLIBS) $(LIBS)
-
-
-build: cpu
+build: _swigfaiss.so faiss.py
 	$(PYTHON) setup.py build

 install: build
 	$(PYTHON) setup.py install

 clean:
-	rm -f swigfaiss_wrap.cpp swigfaiss_gpu_wrap.cpp
-	rm -f swigfaiss.py swigfaiss_gpu.py
-	rm -f _swigfaiss.so _swigfaiss_gpu.so
+	rm -f swigfaiss.{cpp,o,py}
+	rm -f _swigfaiss.so
+	rm -rf build/

-.PHONY: all build clean cpu gpu install
+.PHONY: all build clean install
--- a/python/faiss.py
+++ b/python/faiss.py
@@ -16,16 +16,7 @@ import pdb


 # we import * so that the symbol X can be accessed as faiss.X
-
-try:
-    from swigfaiss_gpu import *
-except ImportError as e:
-
-    if 'No module named' not in e.args[0]:
-        # swigfaiss_gpu is there but failed to load: Warn user about it.
-        sys.stderr.write("Failed to load GPU Faiss: %s\n" % e.args[0])
-        sys.stderr.write("Faiss falling back to CPU-only.\n")
-    from swigfaiss import *
+from .swigfaiss import *

 __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,
                            FAISS_VERSION_MINOR,

--- a/python/setup.py
+++ b/python/setup.py
@@ -8,8 +8,7 @@ here = os.path.abspath(os.path.dirname(__file__))
 check_fpath = os.path.join("_swigfaiss.so")
 if not os.path.exists(check_fpath):
    print("Could not find {}".format(check_fpath))
-    print("Have you run `make` and `make py` "
-          "(and optionally `cd gpu && make && make py && cd ..`)?")
+    print("Have you run `make` and `make -C python`?")

 # make the faiss python package dir
 shutil.rmtree("faiss", ignore_errors=True)
@@ -17,11 +16,6 @@ os.mkdir("faiss")
 shutil.copyfile("faiss.py", "faiss/__init__.py")
 shutil.copyfile("swigfaiss.py", "faiss/swigfaiss.py")
 shutil.copyfile("_swigfaiss.so", "faiss/_swigfaiss.so")
-try:
-    shutil.copyfile("_swigfaiss_gpu.so", "faiss/_swigfaiss_gpu.so")
-    shutil.copyfile("swigfaiss_gpu.py", "faiss/swigfaiss_gpu.py")
-except:
-    pass

 long_description="""
 Faiss is a library for efficient similarity search and clustering of dense
@@ -33,11 +27,11 @@ are implemented on the GPU. It is developed by Facebook AI Research.
 """
 setup(
    name='faiss',
-    version='0.1',
+    version='1.5.1',
    description='A library for efficient similarity search and clustering of dense vectors',
    long_description=long_description,
    url='https://github.com/facebookresearch/faiss',
-    author='Matthijs Douze, Jeff Johnson, Herve Jegou',
+    author='Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini',
    author_email='matthijs@fb.com',
    license='BSD',
    keywords='search nearest neighbors',

--- a/python/swigfaiss.py
+++ b/python/swigfaiss.py
--- a/python/swigfaiss.swig
+++ b/python/swigfaiss.swig
@@ -15,11 +15,7 @@
 // SWIGPYTHON: Python-specific code
 // GPU_WRAPPER: also compile interfaces for GPU.

-#ifdef GPU_WRAPPER
-%module swigfaiss_gpu;
-#else
 %module swigfaiss;
-#endif

 // fbode SWIG fails on warnings, so make them non fatal
 #pragma SWIG nowarn=321
@@ -328,16 +324,6 @@ int get_num_gpus()
 %include "gpu/GpuIndexBinaryFlat.h"
 %include "gpu/GpuDistance.h"

-#ifdef SWIGLUA
-
-/// in Lua, swigfaiss_gpu is known as swigfaiss
-%luacode {
-local swigfaiss = swigfaiss_gpu
-}
-
-#endif
-
-
 #endif



--- a/python/swigfaiss_gpu.py
+++ b/python/swigfaiss_gpu.py
--- a/python/swigfaiss_gpu_wrap.cpp
+++ b/python/swigfaiss_gpu_wrap.cpp
--- a/python/swigfaiss_wrap.cpp
+++ b/python/swigfaiss_wrap.cpp
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -7,21 +7,22 @@
 -include ../makefile.inc

 TESTS_SRC = $(wildcard *.cpp)
-TESTS     = $(TESTS_SRC:.cpp=.o)
+TESTS_OBJ = $(TESTS_SRC:.cpp=.o)
+

 all: run

 run: tests
 	./tests

-tests: $(TESTS) ../libfaiss.a gtest/make/gtest_main.a
+tests: $(TESTS_OBJ) ../libfaiss.a gtest/make/gtest_main.a
 	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)

 %.o: %.cpp gtest
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c -o $@ $< -Igtest/include -I../..

 gtest/make/gtest_main.a: gtest
-	make -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest_main.a
+	$(MAKE) -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest_main.a

 gtest:
 	curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar xz && \
@@ -29,8 +30,8 @@ gtest:
 	rm -rf googletest-release-1.8.0

 clean:
-	rm -f test_runner
-	rm -f *.o
+	rm -f tests
+	rm -f $(TESTS_OBJ)
 	rm -rf gtest