Add conda packages metadata + tests. (#769)

+ Add conda packages metadata (now building Faiss using conda's toolchain); + add Dockerfile for building conda packages (for all CUDA versions); + add working Dockerfile building faiss on Centos7; + simplify GPU build; + avoid falling back to CPU-only version (python); + simplify TravisCI config; + update INSTALL.md; + add configure flag for specifying target architectures (--with-cuda-arch); + fix Makefile for gpu tests; + fix various Makefile issues; + remove stale file (gpu/utils/DeviceUtils.cpp).

Add conda packages metadata + tests. (#769)
+ Add conda packages metadata (now building Faiss using conda's toolchain); + add Dockerfile for building conda packages (for all CUDA versions); + add working Dockerfile building faiss on Centos7; + simplify GPU build; + avoid falling back to CPU-only version (python); + simplify TravisCI config; + update INSTALL.md; + add configure flag for specifying target architectures (--with-cuda-arch); + fix Makefile for gpu tests; + fix various Makefile issues; + remove stale file (gpu/utils/DeviceUtils.cpp).
7f5b22b0 · Lucas Hosseini · GitHub · 6c1cb3cd · 7f5b22b0 · 7f5b22b0
Unverified Commit 7f5b22b0 authored Apr 05, 2019 by Lucas Hosseini Committed by GitHub Apr 05, 2019
34 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,7 +15,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: gcc
      addons:
@@ -25,7 +27,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: gcc
      addons:
@@ -35,7 +39,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
+      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
    - os: linux
      compiler: clang
      addons:
@@ -45,8 +51,9 @@ matrix:
            - liblapack-dev
            - python-numpy
            - python-dev
-#            - swig
+#            - swig3.0
      env:
+        - PYTHON_CFLAGS="-I/usr/include/python2.7"
        # NOTE: Hack, c.f. https://github.com/travis-ci/travis-ci/issues/8613
        - LD_LIBRARY_PATH="/usr/local/clang/lib"
    - os: osx
@@ -69,8 +76,9 @@ install:
  - ./.travis/install.sh
  - aclocal
  - autoconf
-  - ./configure
+  - ./configure --without-cuda
  - make
+  - make -C python
 script:
  - make test
--- a/.travis/install.sh
+++ b/.travis/install.sh
@@ -7,7 +7,7 @@ function installswig() {
    cd /tmp/ &&
        wget https://github.com/swig/swig/archive/rel-3.0.12.tar.gz &&
        tar zxf rel-3.0.12.tar.gz && cd swig-rel-3.0.12 &&
-        ./autogen.sh && ./configure --prefix "${HOME}"/swig/ 1>/dev/null &&
+        ./autogen.sh && ./configure --prefix "${HOME}" 1>/dev/null &&
        make >/dev/null &&
        make install >/dev/null
 }

--- a/Dockerfile
+++ b/Dockerfile
-FROM nvidia/cuda:8.0-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-devel-centos7
-MAINTAINER Pierre Letessier <pletessier@ina.fr>
-RUN apt-get update -y
+# Install MKL
-RUN apt-get install -y libopenblas-dev python-numpy python-dev swig git python-pip wget
+RUN yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo
+RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
+RUN yum install -y intel-mkl-2019.3-062
+ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
+ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH
+ENV LD_PRELOAD /usr/lib64/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\
+/opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\
+/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so
-RUN pip install --upgrade pip
+# Install necessary build tools
-RUN pip install matplotlib
+RUN yum install -y gcc-c++ make swig3
+# Install necesary headers/libs
+RUN yum install -y python-devel numpy
 COPY . /opt/faiss
 WORKDIR /opt/faiss
-ENV BLASLDFLAGS /usr/lib/libopenblas.so.0
+# --with-cuda=/usr/local/cuda-8.0 
+RUN ./configure --without-cuda
-RUN mv example_makefiles/makefile.inc.Linux ./makefile.inc
+RUN make -j $(nproc)
+RUN make -C python
-RUN make tests/test_blas -j $(nproc) && \
+RUN make test
-    make -j $(nproc) && \
+RUN make install
-    make demos/demo_sift1M -j $(nproc) && \
+RUN make -C demos demo_ivfpq_indexing && \
-    make py
+    LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ./demos/demo_ivfpq_indexing
-RUN cd gpu && \
-    make -j $(nproc) && \
-    make test/demo_ivfpq_indexing_gpu && \
-    make py
-ENV PYTHONPATH $PYTHONPATH:/opt/faiss
-# RUN ./tests/test_blas && \
-#     tests/demo_ivfpq_indexing
-# RUN wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz && \
-#     tar xf sift.tar.gz && \
-#     mv sift sift1M
-# RUN tests/demo_sift1M
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -8,39 +8,32 @@ INSTALL file for Faiss (Fair AI Similarity Search)
 Install via Conda
 -----------------
-The easiest way to install FAISS is from anaconda. We regularly push stable releases to conda channel. FAISS conda package is built with conda gcc, depends on libgcc, mkl and numpy package shipped in conda in runtime.
+The easiest way to install FAISS is from Anaconda. We regularly push stable releases to the pytorch conda channel.
-Currently we support faiss-cpu on both Linux and OSX platforms. We also provide faiss-gpu compiled with CUDA8.0/CUDA9.0/CUDA9.2 on Linux systems.
+Currently we support faiss-cpu both on Linux and OSX. We also provide faiss-gpu compiled with CUDA8/CUDA9/CUDA10 on Linux systems.
 You can easily install it by
 ```
 # CPU version only
 conda install faiss-cpu -c pytorch
-# Make sure you have CUDA installed before installing faiss-gpu, otherwise it falls back to CPU version
-conda install faiss-gpu -c pytorch # [DEFAULT]For CUDA8.0
+# GPU version
-conda install faiss-gpu cuda90 -c pytorch # For CUDA9.0
+conda install faiss-gpu cudatoolkit=8.0 -c pytorch # For CUDA8
-conda install faiss-gpu cuda92 -c pytorch # For CUDA9.2
+conda install faiss-gpu cudatoolkit=9.0 -c pytorch # For CUDA9
-# cuda90/cuda91 shown above is a feature, it doesn't install CUDA for you.
+conda install faiss-gpu cudatoolkit=10.0 -c pytorch # For CUDA10
 ```
 Compile from source
 -------------------
-The Faiss compilation works in 3 steps, from easiest to most
+The Faiss compilation works in 2 steps:
-involved:
 1. compile the C++ core and examples
 2. compile the Python interface
-3. compile GPU part
+Steps 2 depends on 1.
-Steps 2 and 3 depend on 1, but they are otherwise independent.
-Alternatively, all 3 steps above can be run by building a Docker image (see
-section "Docker instructions" below).
 It is also possible to build a pure C interface. This optional process is
 described separately (please see the [C interface installation file](c_api/INSTALL.md))
@@ -48,20 +41,48 @@ described separately (please see the [C interface installation file](c_api/INSTA
 General compilation instructions
 ================================
-Faiss has been tested only on x86_64 machines on Linux and Mac OS.
+TL;DR: `./configure && make (&& make install)` for the C++ library, and then `cd python; make && make install` for the python interface.
+1. `./configure`
+This generates the system-dependent configuration for the `Makefile`, stored in
+a file called `makefile.inc`.
+A few useful options:
+- `./configure --without-cuda` in order to build the CPU part only.
+- `./configure --with-cuda=/path/to/cuda-10.1` in order to hint to the path of
+the cudatoolkit.
+- `./configure --with-cuda-arch="-gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_72,code=sm_72"` for specifying which GPU architectures to build against.
+- `./configure --with-python=/path/to/python3.7` in order to build a python
+interface for a different python than the default one.
+2. `make`
+This builds the C++ library (the whole library if a suitable cuda toolkit was
+found, or the CPU part only otherwise).
+3. `make install` (optional)
+This installs the headers and libraries.
-Faiss is compiled via a Makefile. The system-dependent configuration
+4. `make -C python` (or `make py`)
-of the Makefile is in an include file, makefile.inc. The variables in
-makefile.inc must be set by hand.
+This builds the python interface.
+5. `make -C python install`
+This installs the python library.
+Faiss has been tested only on x86_64 machines on Linux and Mac OS.
 Faiss requires a C++ compiler that understands:
- the Intel intrinsics for SSE instructions
+- the Intel intrinsics for SSE instructions,
- the GCC intrinsic for the popcount instruction
+- the GCC intrinsic for the popcount instruction,
- basic OpenMP
+- basic OpenMP.
-There are a few models for makefile.inc in the example_makefiles/
+There are a few examples for makefile.inc in the example_makefiles/
-subdirectory. Copy the relevant one for your system and adjust to your
+subdirectory. There are also indications for specific configurations in the
-needs. There are also indications for specific configurations in the
 troubleshooting section of the wiki.
 https://github.com/facebookresearch/faiss/wiki/Troubleshooting
@@ -70,14 +91,6 @@ Faiss comes as a .a archive, that can be linked with executables or
 dynamic libraries (useful for the Python wrapper).
-Step 1: Compiling the C++ Faiss
-===============================
-TL;DR: `./configure && make && make install`
-The CPU version of Faiss is written in C++11.
 BLAS/Lapack
 -----------
@@ -103,17 +116,6 @@ and run
  `./misc/test_blas`
-Building faiss
-------------
-Once the proper BLAS flags are set, the library should compile
-smoothly by running
-  `make`
-Then, in order to install the library and the headers, run
-   `make install`
 Testing Faiss
 -------------
@@ -122,13 +124,18 @@ A basic usage example is in
  `demos/demo_ivfpq_indexing`
-it makes a small index, stores it and performs some searches. A normal
+which you can build by calling
+  `make -C demos demo_ivfpq_indexing`
+It makes a small index, stores it and performs some searches. A normal
 runtime is around 20s. With a fast machine and Intel MKL's BLAS it
 runs in 2.5s.
 To run the whole test suite:
-   `make test`
+   `make test` (for the CPU part)
+   `make test_gpu` (for the GPU part)
 A real-life benchmark
@@ -154,16 +161,12 @@ setting a different index_key to find the indexing structure that
 gives the best performance.
-Step 2: Compiling the Python interface
+The Python interface
 ======================================
 The Python interface is compiled with
-  `make py`
+  `make -C python` (or `make py`)
-If you want to compile it for another python version than the default
-Python 2.7, in particular Python 3, the PYTHONCFLAGS must be adjusted in
-makefile.inc, see the examples.
 How it works
 ------------
@@ -172,18 +175,7 @@ The Python interface is provided via SWIG (Simple Wrapper and
 Interface Generator) and an additional level of manual wrappers (in python/faiss.py).
 SWIG generates two wrapper files: a Python file (`python/swigfaiss.py`) and a
-C++ file that must be compiled to a dynamic library (`python/_swigfaiss.so`). These
+C++ file that must be compiled to a dynamic library (`python/_swigfaiss.so`).
-files are included in the repository, so running swig is only required when
-the C++ headers of Faiss are changed.
-The C++ compilation to the dynamic library requires to set:
- `SHAREDFLAGS`: system-specific flags to generate a dynamic library
- `PYTHONCFLAGS`: include flags for Python
-See the example makefile.inc's on how to set the flags.
 Testing the Python wrapper
 --------------------------
@@ -227,23 +219,21 @@ operating points. You can play around with the types of indexes.
 Step 3: Compiling the GPU implementation
 ========================================
-There is a GPU-specific Makefile in the `gpu/` directory. It depends on
-the same ../makefile.inc for system-specific variables. You need
-libfaiss.a from Step 1 for this to work.
 The GPU version is a superset of the CPU version. In addition it
 requires the cuda compiler and related libraries (Cublas)
-See the example makefile on how to set the flags.
 The nvcc-specific flags to pass to the compiler, based on your desired
-compute capability. Only compute capability 3.5+ is supported. For
+compute capability can be customized by providing the `--with-cuda-arch` to
-example, we enable by default:
+`./configure`. Only compute capability 3.5+ is supported. For example, we enable
+by default:
 ```
-gencode arch=compute_35,code="compute_35"
+-gencode=arch=compute_35,code=compute_35
-gencode arch=compute_52,code="compute_52"
+-gencode=arch=compute_52,code=compute_52
-gencode arch=compute_60,code="compute_60"
+-gencode=arch=compute_60,code=compute_60
+-gencode=arch=compute_61,code=compute_61
+-gencode=arch=compute_70,code=compute_70
+-gencode=arch=compute_75,code=compute_75
 ```
 However, look at https://developer.nvidia.com/cuda-gpus to determine
@@ -256,10 +246,6 @@ nvcc, except some of them that are not recognized and that should be
 escaped by prefixing them with -Xcompiler. Also link flags that are
 prefixed with -Wl, should be passed with -Xlinker.
-Then compile with
-  `cd gpu; make`
 You may want to add `-j 10` to use 10 threads during compile.
 Testing the GPU implementation
@@ -267,30 +253,12 @@ Testing the GPU implementation
 Compile the example with
-  `cd gpu; make tests/demo_ivfpq_indexing_gpu`
+  `make -C gpu/test demo_ivfpq_indexing_gpu`
 This produce the GPU code equivalent to the CPU
 demo_ivfpq_indexing. It also shows how to translate indexed from/to
 the GPU.
-Compiling the Python interface with GPU support
-----------------------------------------------
-Given step 2, adding support of the GPU from Python is quite
-straightforward. Run
-`cd python; make _swigfaiss_gpu.so`
-The import is the same for the GPU version and the CPU-only
-version.
-`python -c "import faiss"`
-Faiss tries to load the GPU version first, and in case of failure,
-loads the CPU-only version. To investigate more closely the cause of
-a failure, you can run:
-`python -c "import _swigfaiss_gpu"`
 Python example with GPU support
 -------------------------------
@@ -357,7 +325,7 @@ libfaiss.so (or libfaiss.dylib)
 the executable should be linked to one of these. If you use
 the static version (.a), add the LDFLAGS used in the Makefile.
-For binary-only distributions, the include files should be under
+For binary-only distributions, the headers should be under
 a `faiss/` directory, so that they can be included as
 ```c++
@@ -371,12 +339,12 @@ Python
 To import Faiss in your own Python project, you need the files
 ```
-faiss.py
+__init__.py
-swigfaiss.py  / swigfaiss_gpu.py
+swigfaiss.py
-_swigfaiss.so / _swigfaiss_gpu.so
+_swigfaiss.so
 ```
+to be present in a `faiss/` directory visible in the PYTHONPATH or in the
-to be visible in the PYTHONPATH or in the current directory.
+current directory.
 Then Faiss can be used in python with
 ```python

--- a/Index.h
+++ b/Index.h
@@ -18,8 +18,8 @@
 #include <sstream>
 #define FAISS_VERSION_MAJOR 1
-#define FAISS_VERSION_MINOR 4
+#define FAISS_VERSION_MINOR 5
-#define FAISS_VERSION_PATCH 0
+#define FAISS_VERSION_PATCH 1
 /**
 * @namespace faiss

--- a/Makefile
+++ b/Makefile
@@ -6,19 +6,35 @@
 -include makefile.inc
-SRC=$(wildcard *.cpp)
+HEADERS     = $(wildcard *.h)
-OBJ=$(SRC:.cpp=.o)
+SRC         = $(wildcard *.cpp)
+OBJ         = $(SRC:.cpp=.o)
+INSTALLDIRS = $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
+GPU_HEADERS = $(wildcard gpu/*.h gpu/impl/*.h gpu/utils/*.h)
+GPU_CPPSRC  = $(wildcard gpu/*.cpp gpu/impl/*.cpp gpu/utils/*.cpp)
+GPU_CUSRC   = $(wildcard gpu/*.cu gpu/impl/*.cu gpu/utils/*.cu \
+gpu/utils/nvidia/*.cu gpu/utils/blockselect/*.cu gpu/utils/warpselect/*.cu)
+GPU_SRC     = $(GPU_CPPSRC) $(GPU_CUSRC)
+GPU_CPPOBJ  = $(GPU_CPPSRC:.cpp=.o)
+GPU_CUOBJ   = $(GPU_CUSRC:.cu=.o)
+GPU_OBJ     = $(GPU_CPPOBJ) $(GPU_CUOBJ)
+GPU_INSTALLDIRS = $(DESTDIR)$(includedir)/faiss/gpu/{impl,utils}
+ifneq ($(strip $(NVCC)),)
+	OBJ         += $(GPU_OBJ)
+	INSTALLDIRS += $(GPU_INSTALLDIRS)
+	HEADERS     += $(GPU_HEADERS)
+endif
 ############################
 # Building
-default: libfaiss.a
 all: libfaiss.a libfaiss.$(SHAREDEXT)
 libfaiss.a: $(OBJ)
-	ar r $@ $^
+	$(AR) r $@ $^
 libfaiss.$(SHAREDEXT): $(OBJ)
 	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
@@ -26,8 +42,11 @@ libfaiss.$(SHAREDEXT): $(OBJ)
 %.o: %.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@
+%.o: %.cu
+	$(NVCC) $(NVCCFLAGS) -g -O3 -c $< -o $@
 clean:
-	rm -f libfaiss.*
+	rm -f libfaiss.{a,$(SHAREDEXT)}
 	rm -f $(OBJ)
@@ -35,15 +54,14 @@ clean:
 # Installing
 install: libfaiss.a libfaiss.$(SHAREDEXT) installdirs
-	cp libfaiss.a libfaiss.$(SHAREDEXT) $(DESTDIR)$(libdir)
+	cp libfaiss.{a,$(SHAREDEXT)} $(DESTDIR)$(libdir)
-	cp *.h $(DESTDIR)$(includedir)/faiss/
+	tar cf - $(HEADERS) | tar xf - -C $(DESTDIR)$(includedir)/faiss/
 installdirs:
-	$(MKDIR_P) $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
+	$(MKDIR_P) $(INSTALLDIRS)
 uninstall:
-	rm $(DESTDIR)$(libdir)/libfaiss.a
+	rm -f $(DESTDIR)$(libdir)/libfaiss.{a,$(SHAREDEXT)}
-	rm $(DESTDIR)$(libdir)/libfaiss.$(SHAREDEXT)
 	rm -rf $(DESTDIR)$(includedir)/faiss
@@ -52,41 +70,44 @@ uninstall:
 -include depend
-# The above makefile.dep is generated by the following target:
+depend: $(SRC) $(GPU_SRC)
-depend:
+	for i in $^; do \
-	for i in $(SRC); do \
+		$(CXXCPP) $(CPPFLAGS) -x c++ -MM $$i; \
-		$(CXXCPP) $(CPPFLAGS) -MM $$i; \
 	done > depend
+#############################
+# Python
+py: libfaiss.a
+	$(MAKE) -C python
 #############################
 # Tests
 test: libfaiss.a py
-	make -C tests run
+	$(MAKE) -C tests run
 	PYTHONPATH=./python/build/`ls python/build | grep lib` \
 	$(PYTHON) -m unittest discover tests/ -v
+test_gpu: libfaiss.a
+	$(MAKE) -C gpu/test run
+	PYTHONPATH=./python/build/`ls python/build | grep lib` \
+	$(PYTHON) -m unittest discover gpu/test/ -v
 #############################
 # Demos
 demos: libfaiss.a
-	make -C demos
+	$(MAKE) -C demos
 #############################
 # Misc
 misc/test_blas: misc/test_blas.cpp
-	$(CXX) $(CXXFLAG) $(LDFLAGS) -o $@ $^ $(LIBS)
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
-#############################
-# Python
-py:
-	$(MAKE) -C python build
-.PHONY: all clean default demos install installdirs py test uninstall
+.PHONY: all clean demos install installdirs py test gpu_test uninstall
--- a/acinclude/fa_check_cuda.m4
+++ b/acinclude/fa_check_cuda.m4
 AC_DEFUN([FA_CHECK_CUDA], [
 AC_ARG_WITH(cuda,
-[AS_HELP_STRING([--with-cuda=<prefix>], [prefix of the CUDA installation])])
+  [AS_HELP_STRING([--with-cuda=<prefix>], [prefix of the CUDA installation])])
-case $with_cuda in
+AC_ARG_WITH(cuda-arch,
-"") cuda_prefix=/usr/local/cuda ;;
+  [AS_HELP_STRING([--with-cuda-arch=<gencodes>], [device specific -gencode flags])],
-*) cuda_prefix="$with_cuda"
+  [],
-esac
+  [with_cuda_arch=default])
-AC_CHECK_PROG(NVCC, "nvcc", "$cuda_prefix/bin/nvcc", "", "$cuda_prefix/bin")
+if test x$with_cuda != xno; then
-fa_nvcc_bin=$NVCC
+  if test x$with_cuda != x; then
+    cuda_prefix=$with_cuda
+    AC_CHECK_PROG(NVCC, [nvcc], [$cuda_prefix/bin/nvcc], [], [$cuda_prefix/bin])
+    NVCC_CPPFLAGS="-I$cuda_prefix/include"
+    NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+  else
+    AC_CHECK_PROGS(NVCC, [nvcc /usr/local/cuda/bin/nvcc], [])
+    if test "x$NVCC" == "x/usr/local/cuda/bin/nvcc"; then
+      cuda_prefix="/usr/local/cuda"
+      NVCC_CPPFLAGS="-I$cuda_prefix/include"
+      NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+    else
+      cuda_prefix=""
+      NVCC_CPPFLAGS=""
+      NVCC_LDFLAGS=""
+    fi
+  fi
+  if test "x$NVCC" == x; then
+    AC_MSG_ERROR([Couldn't find nvcc])
+  fi
+  if test "x$with_cuda_arch" == xdefault; then
+    with_cuda_arch="-gencode=arch=compute_35,code=compute_35 \\
+-gencode=arch=compute_52,code=compute_52 \\
+-gencode=arch=compute_60,code=compute_60 \\
+-gencode=arch=compute_61,code=compute_61 \\
+-gencode=arch=compute_70,code=compute_70 \\
+-gencode=arch=compute_75,code=compute_75"
+  fi
-if test x$fa_nvcc_bin != x; then
  fa_save_CPPFLAGS="$CPPFLAGS"
  fa_save_LDFLAGS="$LDFLAGS"
  fa_save_LIBS="$LIBS"
-  NVCC_CPPFLAGS="-I$cuda_prefix/include"
-  NVCC_LDFLAGS="-L$cuda_prefix/lib64"
  CPPFLAGS="$NVCC_CPPFLAGS $CPPFLAGS"
  LDFLAGS="$NVCC_LDFLAGS $LDFLAGS"
  AC_CHECK_HEADER([cuda.h], [], AC_MSG_FAILURE([Couldn't find cuda.h]))
-  AC_CHECK_LIB([cuda], [cuInit], [], AC_MSG_FAILURE([Couldn't find libcuda]))
  AC_CHECK_LIB([cublas], [cublasAlloc], [], AC_MSG_FAILURE([Couldn't find libcublas]))
  AC_CHECK_LIB([cudart], [cudaSetDevice], [], AC_MSG_FAILURE([Couldn't find libcudart]))
@@ -32,8 +56,6 @@ if test x$fa_nvcc_bin != x; then
  CPPFLAGS="$fa_save_CPPFLAGS"
  LDFLAGS="$fa_save_LDFLAGS"
  LIBS="$fa_save_LIBS"
-else
-  cuda_prefix=""
 fi
 AC_SUBST(NVCC)
@@ -41,4 +63,5 @@ AC_SUBST(NVCC_CPPFLAGS)
 AC_SUBST(NVCC_LDFLAGS)
 AC_SUBST(NVCC_LIBS)
 AC_SUBST(CUDA_PREFIX, $cuda_prefix)
+AC_SUBST(CUDA_ARCH, $with_cuda_arch)
 ])
--- a/acinclude/fa_prog_swig.m4
+++ b/acinclude/fa_prog_swig.m4
@@ -3,10 +3,9 @@ AC_DEFUN([FA_PROG_SWIG], [
 AC_ARG_WITH(swig,
 [AS_HELP_STRING([--with-swig=<bin>], [use SWIG binary <bin>])])
 case $with_swig in
-  "") SWIG_BIN=swig ;;
+ "") AC_CHECK_PROG(SWIG, swig, swig);;
-  *) SWIG_BIN="$with_swig"
+  *) SWIG="$with_swig"
 esac
-AC_CHECK_PROG(SWIG, $SWIG_BIN, $SWIG_BIN)
 AC_SUBST(SWIG)
 ])
--- a/acinclude/fa_python.m4
+++ b/acinclude/fa_python.m4
@@ -10,36 +10,12 @@ esac
 AC_CHECK_PROG(PYTHON, $PYTHON_BIN, $PYTHON_BIN)
 fa_python_bin=$PYTHON
-AC_ARG_WITH(python-config,
+AC_MSG_CHECKING([for Python C flags])
-  [AS_HELP_STRING([--with-python-config=<bin>], [use Python config binary <bin>])])
+fa_python_cflags=`$PYTHON -c "
-case $with_python_config in
+import sysconfig
-  "") PYTHON_CFG_BIN="${PYTHON_BIN}-config" ;;
+paths = [['-I' + sysconfig.get_path(p) for p in ['include', 'platinclude']]]
-  *) PYTHON_CFG_BIN="$with_python_config"
+print(' '.join(paths))"`
-esac
+AC_MSG_RESULT($fa_python_cflags)
+AC_SUBST(PYTHON_CFLAGS, "$PYTHON_CFLAGS $fa_python_cflags")
-AC_CHECK_PROG(PYTHON_CFG, $PYTHON_CFG_BIN, $PYTHON_CFG_BIN)
-fa_python_cfg_bin=$PYTHON_CFG
-if test x$fa_python_cfg_bin != x; then
-  AC_MSG_CHECKING([for Python C flags])
-  fa_python_cflags=`${PYTHON_CFG} --includes`
-  if test x"$fa_python_cflags" == x; then
-    AC_MSG_RESULT([not found])
-    AC_MSG_WARN([You won't be able to build the python interface.])
-  else
-    AC_MSG_RESULT($fa_python_cflags)
-    AC_SUBST(PYTHON_CFLAGS, $fa_python_cflags)
-  fi
-  AC_MSG_CHECKING([for Python ld flags])
+])dnl FA_PYTHON
-  fa_python_ldflags=`${PYTHON_CFG} --ldflags`
-  if test x"$fa_python_ldflags" == x; then
-    AC_MSG_RESULT([not found])
-  else
-    AC_MSG_RESULT($fa_python_ldflags)
-    AC_SUBST(PYTHON_LDFLAGS, $fa_python_ldflags)
-  fi
-else
-  AC_MSG_WARN([You won't be able to build the python interface.])
-fi
-])
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
+FROM soumith/conda-cuda:latest
+COPY ./ faiss
+WORKDIR /faiss/conda
+ENV FAISS_BUILD_VERSION 1.5.1
+ENV FAISS_BUILD_NUMBER  1
+RUN conda build faiss --no-anaconda-upload -c pytorch
+RUN CUDA_ROOT=/usr/local/cuda-8.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "8.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
+RUN CUDA_ROOT=/usr/local/cuda-9.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61 \
+               -gencode=arch=compute_70,code=compute_70" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "9.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
+RUN CUDA_ROOT=/usr/local/cuda-10.0 \
+    CUDA_ARCH="-gencode=arch=compute_35,code=compute_35 \
+               -gencode=arch=compute_52,code=compute_52 \
+               -gencode=arch=compute_60,code=compute_60 \
+               -gencode=arch=compute_61,code=compute_61 \
+               -gencode=arch=compute_70,code=compute_70 \
+               -gencode=arch=compute_72,code=compute_72 \
+               -gencode=arch=compute_75,code=compute_75" \
+    conda build faiss-gpu --variants '{ "cudatoolkit": "10.0" }' \
+          --no-anaconda-upload -c pytorch --no-test
--- a/conda/conda_build_config.yaml
+++ b/conda/conda_build_config.yaml
+CONDA_BUILD_SYSROOT:
+  - /opt/MacOSX10.9.sdk        # [osx]
+python:
+  - 2.7
+  - 3.5
+  - 3.6
+  - 3.7
--- a/conda/faiss-gpu/build.sh
+++ b/conda/faiss-gpu/build.sh
+./configure --with-cuda=$CUDA_ROOT --with-cuda-arch="$CUDA_ARCH"
+make -j $CPU_COUNT
+cd python
+make
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt
--- a/conda/faiss-gpu/conda_build_config.yaml
+++ b/conda/faiss-gpu/conda_build_config.yaml
+cxx_compiler_version:
+  - 5.4
+cudatoolkit:
+  - 8.0
+  - 9.0
+  - 9.2
+  - 10.0
+#  - 10.1 # cudatoolkit 10.1 packages are not yet available on Anaconda.
+pin_run_as_build:
+  cudatoolkit:
+    max_pin: x.x
--- a/conda/faiss-gpu/meta.yaml
+++ b/conda/faiss-gpu/meta.yaml
+package:
+  name: faiss-gpu
+  version: "{{ FAISS_BUILD_VERSION }}"
+source:
+  git_url: ../../
+requirements:
+  build:
+    - {{ compiler('cxx') }}
+    - llvm-openmp # [osx]
+    - setuptools
+    - swig
+  host:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy 1.11.*
+    - mkl >=2018
+    - cudatoolkit {{ cudatoolkit }}
+  run:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy >=1.11
+    - mkl >=2018
+    - blas=*=mkl
+    - {{ pin_compatible('cudatoolkit') }}
+build:
+  number: {{ FAISS_BUILD_NUMBER }}
+  script_env:
+    - CUDA_ROOT
+    - CUDA_ARCH
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: BSD 3-Clause
+  license_family: BSD
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
--- a/conda/faiss-gpu/run_test.py
+++ b/conda/faiss-gpu/run_test.py
+import faiss
+import numpy as np
+d = 128
+n = 100
+rs = np.random.RandomState(1337)
+x = rs.rand(n, d).astype(np.float32)
+index = faiss.IndexFlatL2(d)
+res = faiss.StandardGpuResources()
+gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
+gpu_index.add(x)
+D, I = index.search(x, 10)
--- a/conda/faiss/build.sh
+++ b/conda/faiss/build.sh
+./configure --without-cuda
+make -j $CPU_COUNT
+cd python
+make
+$PYTHON setup.py install --single-version-externally-managed --record=record.txt
--- a/conda/faiss/meta.yaml
+++ b/conda/faiss/meta.yaml
+package:
+  name: faiss-cpu
+  version: "{{ FAISS_BUILD_VERSION }}"
+source:
+  git_url: ../../
+requirements:
+  build:
+    - {{ compiler('cxx') }}
+    - llvm-openmp # [osx]
+    - setuptools
+    - swig
+  host:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy 1.11.*
+    - mkl >=2018
+  run:
+    - python {{ python }}
+    - intel-openmp # [osx]
+    - numpy >=1.11
+    - blas=*=mkl
+    - mkl >=2018
+build:
+  number: {{ FAISS_BUILD_NUMBER }}
+about:
+  home: https://github.com/facebookresearch/faiss
+  license: BSD 3-Clause
+  license_family: BSD
+  license_file: LICENSE
+  summary: A library for efficient similarity search and clustering of dense vectors.
--- a/conda/faiss/run_test.py
+++ b/conda/faiss/run_test.py
+import faiss
+import numpy as np
+d = 128
+# NOTE: BLAS kicks in only when n > distance_compute_blas_threshold = 20
+n = 100
+rs = np.random.RandomState(1337)
+x = rs.rand(n, d).astype(np.float32)
+index = faiss.IndexFlatL2(d)
+index.add(x)
+D, I = index.search(x, 10)
--- a/configure
+++ b/configure
@@ -645,6 +645,7 @@ build_cpu
 build
 OPENMP_CXXFLAGS
 LIBOBJS
+CUDA_ARCH
 CUDA_PREFIX
 NVCC_LIBS
 NVCC_LDFLAGS
@@ -655,9 +656,7 @@ CXXCPP
 NVCC
 SWIG
 NUMPY_INCLUDE
-PYTHON_LDFLAGS
 PYTHON_CFLAGS
-PYTHON_CFG
 PYTHON
 MKDIR_P
 SET_MAKE
@@ -692,7 +691,6 @@ infodir
 docdir
 oldincludedir
 includedir
-runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -716,9 +714,9 @@ ac_subst_files=''
 ac_user_opts='
 enable_option_checking
 with_python
-with_python_config
 with_swig
 with_cuda
+with_cuda_arch
 enable_openmp
 with_blas
 with_lapack
@@ -774,7 +772,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1027,15 +1024,6 @@ do
  | -silent | --silent | --silen | --sile | --sil)
    silent=yes ;;
-  -runstatedir | --runstatedir | --runstatedi | --runstated \
-  | --runstate | --runstat | --runsta | --runst | --runs \
-  | --run | --ru | --r)
-    ac_prev=runstatedir ;;
-  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
-  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
-  | --run=* | --ru=* | --r=*)
-    runstatedir=$ac_optarg ;;
  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
    ac_prev=sbindir ;;
  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1173,7 +1161,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
  eval ac_val=\$$ac_var
  # Remove trailing slashes.
@@ -1326,7 +1314,6 @@ Fine tuning of the installation directories:
  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
-  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
  --libdir=DIR            object code libraries [EPREFIX/lib]
  --includedir=DIR        C header files [PREFIX/include]
  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
@@ -1367,10 +1354,10 @@ Optional Packages:
  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
  --with-python=<bin>     use Python binary <bin>
-  --with-python-config=<bin>
-                          use Python config binary <bin>
  --with-swig=<bin>       use SWIG binary <bin>
  --with-cuda=<prefix>    prefix of the CUDA installation
+  --with-cuda-arch=<gencodes>
+                          device specific -gencode flags
  --with-blas=<lib>       use BLAS library <lib>
  --with-lapack=<lib>     use LAPACK library <lib>
@@ -4143,88 +4130,17 @@ fi
 fa_python_bin=$PYTHON
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Python C flags" >&5
-# Check whether --with-python-config was given.
-if test "${with_python_config+set}" = set; then :
-  withval=$with_python_config;
-fi
-case $with_python_config in
-  "") PYTHON_CFG_BIN="${PYTHON_BIN}-config" ;;
-  *) PYTHON_CFG_BIN="$with_python_config"
-esac
-# Extract the first word of "$PYTHON_CFG_BIN", so it can be a program name with args.
-set dummy $PYTHON_CFG_BIN; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_prog_PYTHON_CFG+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  if test -n "$PYTHON_CFG"; then
-  ac_cv_prog_PYTHON_CFG="$PYTHON_CFG" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_prog_PYTHON_CFG="$PYTHON_CFG_BIN"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-fi
-fi
-PYTHON_CFG=$ac_cv_prog_PYTHON_CFG
-if test -n "$PYTHON_CFG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PYTHON_CFG" >&5
-$as_echo "$PYTHON_CFG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-fa_python_cfg_bin=$PYTHON_CFG
-if test x$fa_python_cfg_bin != x; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Python C flags" >&5
 $as_echo_n "checking for Python C flags... " >&6; }
-  fa_python_cflags=`${PYTHON_CFG} --includes`
+fa_python_cflags=`$PYTHON -c "
-  if test x"$fa_python_cflags" == x; then
+import sysconfig
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
+paths = ['-I' + sysconfig.get_path(p) for p in ['include', 'platinclude']]
-$as_echo "not found" >&6; }
+print(' '.join(paths))"`
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: You won't be able to build the python interface." >&5
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $fa_python_cflags" >&5
-$as_echo "$as_me: WARNING: You won't be able to build the python interface." >&2;}
-  else
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $fa_python_cflags" >&5
 $as_echo "$fa_python_cflags" >&6; }
-    PYTHON_CFLAGS=$fa_python_cflags
+PYTHON_CFLAGS="$PYTHON_CFLAGS $fa_python_cflags"
-  fi
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for Python ld flags" >&5
-$as_echo_n "checking for Python ld flags... " >&6; }
-  fa_python_ldflags=`${PYTHON_CFG} --ldflags`
-  if test x"$fa_python_ldflags" == x; then
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-  else
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $fa_python_ldflags" >&5
-$as_echo "$fa_python_ldflags" >&6; }
-    PYTHON_LDFLAGS=$fa_python_ldflags
-  fi
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: You won't be able to build the python interface." >&5
-$as_echo "$as_me: WARNING: You won't be able to build the python interface." >&2;}
-fi
 if test x$PYTHON != x; then
@@ -4266,12 +4182,8 @@ if test "${with_swig+set}" = set; then :
 fi
 case $with_swig in
-  "") SWIG_BIN=swig ;;
+ "") # Extract the first word of "swig", so it can be a program name with args.
-  *) SWIG_BIN="$with_swig"
+set dummy swig; ac_word=$2
-esac
-# Extract the first word of "$SWIG_BIN", so it can be a program name with args.
-set dummy $SWIG_BIN; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
 if ${ac_cv_prog_SWIG+:} false; then :
@@ -4287,7 +4199,7 @@ do
  test -z "$as_dir" && as_dir=.
    for ac_exec_ext in '' $ac_executable_extensions; do
  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_prog_SWIG="$SWIG_BIN"
+    ac_cv_prog_SWIG="swig"
    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
    break 2
  fi
@@ -4306,6 +4218,9 @@ else
 $as_echo "no" >&6; }
 fi
+;;
+  *) SWIG="$with_swig"
+esac
@@ -4711,13 +4626,62 @@ if test "${with_cuda+set}" = set; then :
  withval=$with_cuda;
 fi
-case $with_cuda in
-"") cuda_prefix=/usr/local/cuda ;;
-*) cuda_prefix="$with_cuda"
-esac
-# Extract the first word of ""nvcc"", so it can be a program name with args.
+# Check whether --with-cuda-arch was given.
-set dummy "nvcc"; ac_word=$2
+if test "${with_cuda_arch+set}" = set; then :
+  withval=$with_cuda_arch;
+else
+  with_cuda_arch=default
+fi
+if test x$with_cuda != xno; then
+  if test x$with_cuda != x; then
+    cuda_prefix=$with_cuda
+    # Extract the first word of "nvcc", so it can be a program name with args.
+set dummy nvcc; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_NVCC+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$NVCC"; then
+  ac_cv_prog_NVCC="$NVCC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $cuda_prefix/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_prog_NVCC="$cuda_prefix/bin/nvcc"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+fi
+fi
+NVCC=$ac_cv_prog_NVCC
+if test -n "$NVCC"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5
+$as_echo "$NVCC" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+    NVCC_CPPFLAGS="-I$cuda_prefix/include"
+    NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+  else
+    for ac_prog in nvcc /usr/local/cuda/bin/nvcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
 if ${ac_cv_prog_NVCC+:} false; then :
@@ -4727,13 +4691,13 @@ else
  ac_cv_prog_NVCC="$NVCC" # Let the user override the test.
 else
 as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in "$cuda_prefix/bin"
+for as_dir in $PATH
 do
  IFS=$as_save_IFS
  test -z "$as_dir" && as_dir=.
    for ac_exec_ext in '' $ac_executable_extensions; do
  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_prog_NVCC=""$cuda_prefix/bin/nvcc""
+    ac_cv_prog_NVCC="$ac_prog"
    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
    break 2
  fi
@@ -4741,7 +4705,6 @@ done
  done
 IFS=$as_save_IFS
-  test -z "$ac_cv_prog_NVCC" && ac_cv_prog_NVCC=""""
 fi
 fi
 NVCC=$ac_cv_prog_NVCC
@@ -4754,16 +4717,37 @@ $as_echo "no" >&6; }
 fi
-fa_nvcc_bin=$NVCC
+  test -n "$NVCC" && break
+done
+    if test "x$NVCC" == "x/usr/local/cuda/bin/nvcc"; then
+      cuda_prefix="/usr/local/cuda"
+      NVCC_CPPFLAGS="-I$cuda_prefix/include"
+      NVCC_LDFLAGS="-L$cuda_prefix/lib64"
+    else
+      cuda_prefix=""
+      NVCC_CPPFLAGS=""
+      NVCC_LDFLAGS=""
+    fi
+  fi
+  if test "x$NVCC" == x; then
+    as_fn_error $? "Couldn't find nvcc" "$LINENO" 5
+  fi
+  if test "x$with_cuda_arch" == xdefault; then
+    with_cuda_arch="-gencode=arch=compute_35,code=compute_35 \\
+-gencode=arch=compute_52,code=compute_52 \\
+-gencode=arch=compute_60,code=compute_60 \\
+-gencode=arch=compute_61,code=compute_61 \\
+-gencode=arch=compute_70,code=compute_70 \\
+-gencode=arch=compute_75,code=compute_75"
+  fi
-if test x$fa_nvcc_bin != x; then
  fa_save_CPPFLAGS="$CPPFLAGS"
  fa_save_LDFLAGS="$LDFLAGS"
  fa_save_LIBS="$LIBS"
-  NVCC_CPPFLAGS="-I$cuda_prefix/include"
-  NVCC_LDFLAGS="-L$cuda_prefix/lib64"
  CPPFLAGS="$NVCC_CPPFLAGS $CPPFLAGS"
  LDFLAGS="$NVCC_LDFLAGS $LDFLAGS"
@@ -4778,56 +4762,6 @@ See \`config.log' for more details" "$LINENO" 5; }
 fi
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cuInit in -lcuda" >&5
-$as_echo_n "checking for cuInit in -lcuda... " >&6; }
-if ${ac_cv_lib_cuda_cuInit+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lcuda  $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char cuInit ();
-int
-main ()
-{
-return cuInit ();
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_cxx_try_link "$LINENO"; then :
-  ac_cv_lib_cuda_cuInit=yes
-else
-  ac_cv_lib_cuda_cuInit=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cuda_cuInit" >&5
-$as_echo "$ac_cv_lib_cuda_cuInit" >&6; }
-if test "x$ac_cv_lib_cuda_cuInit" = xyes; then :
-  cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBCUDA 1
-_ACEOF
-  LIBS="-lcuda $LIBS"
-else
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "Couldn't find libcuda
-See \`config.log' for more details" "$LINENO" 5; }
-fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cublasAlloc in -lcublas" >&5
 $as_echo_n "checking for cublasAlloc in -lcublas... " >&6; }
 if ${ac_cv_lib_cublas_cublasAlloc+:} false; then :
@@ -4935,8 +4869,6 @@ fi
  CPPFLAGS="$fa_save_CPPFLAGS"
  LDFLAGS="$fa_save_LDFLAGS"
  LIBS="$fa_save_LIBS"
-else
-  cuda_prefix=""
 fi
@@ -4945,6 +4877,8 @@ fi
 CUDA_PREFIX=$cuda_prefix
+CUDA_ARCH=$with_cuda_arch
@@ -5474,8 +5408,6 @@ main ()
    if (*(data + i) != *(data3 + i))
      return 14;
  close (fd);
-  free (data);
-  free (data3);
  return 0;
 }
 _ACEOF

--- a/demos/Makefile
+++ b/demos/Makefile
@@ -15,8 +15,8 @@ all: $(DEMOS)
 clean:
 	rm -f $(DEMOS)
-%: %.cpp ../libfaiss.a
+%: %.cpp
-	$(CXX) -o $@ $(CXXFLAGS) $^ $(LDFLAGS) $(LIBS)
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) -lfaiss
 .PHONY: all clean
--- a/depend
+++ b/depend
--- a/gpu/Makefile
+++ b/gpu/Makefile
-# Copyright (c) 2015-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the BSD+Patents license found in the
-# LICENSE file in the root directory of this source tree.
-include ../makefile.inc
-all: libgpufaiss.a libgpufaiss.$(SHAREDEXT)
-CPPOBJ =    GpuResources.o \
-            IndexProxy.o \
-            StandardGpuResources.o \
-            GpuAutoTune.o \
-            GpuClonerOptions.o \
-            impl/RemapIndices.o \
-            utils/DeviceMemory.o \
-            utils/StackDeviceMemory.o \
-            utils/DeviceUtils.o \
-            utils/Timer.o \
-            utils/MemorySpace.o \
-            utils/WorkerThread.o
-INS = 1 32 64 128 256 F512 T512 F1024 T1024
-CUOBJ =     impl/BinaryDistance.o \
-            impl/BinaryFlatIndex.o \
-            impl/BroadcastSum.o \
-            impl/Distance.o \
-            impl/FlatIndex.o \
-            impl/InvertedListAppend.o \
-            impl/IVFBase.o \
-            impl/IVFFlat.o \
-            impl/IVFFlatScan.o \
-            impl/IVFPQ.o \
-            impl/IVFUtils.o \
-            impl/IVFUtilsSelect1.o \
-            impl/IVFUtilsSelect2.o \
-            impl/L2Norm.o \
-            impl/L2Select.o \
-            impl/PQCodeDistances.o \
-            impl/PQScanMultiPassNoPrecomputed.o \
-            impl/PQScanMultiPassPrecomputed.o \
-            impl/VectorResidual.o \
-            GpuDistance.o \
-            GpuIndex.o \
-            GpuIndexBinaryFlat.o \
-            GpuIndexFlat.o \
-            GpuIndexIVF.o \
-            GpuIndexIVFFlat.o \
-            GpuIndexIVFPQ.o  \
-            utils/Float16.o \
-            utils/MatrixMult.o \
-            utils/BlockSelectFloat.o \
-            utils/BlockSelectHalf.o \
-            utils/WarpSelectFloat.o \
-            utils/WarpSelectHalf.o \
-            utils/nvidia/fp16_emu.o \
-            $(foreach v,$(INS),   \
-                utils/blockselect/BlockSelectHalf$(v).o \
-                utils/blockselect/BlockSelectFloat$(v).o \
-                utils/warpselect/WarpSelectHalf$(v).o \
-                utils/warpselect/WarpSelectFloat$(v).o \
-             )
-%.o: %.cpp
-	$(CXX) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@ $(CUDACFLAGS)
-%.o: %.cu
-	$(NVCC) $(NVCCFLAGS) -g -O3 -c $< -o $@
-libgpufaiss.a: $(CPPOBJ) $(CUOBJ)
-	ar r $@ $^
-libgpufaiss.$(SHAREDEXT): $(CPPOBJ) $(CUOBJ)
-	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) $(NVCCLDFLAGS) \
-		-o libgpufaiss.$(SHAREDEXT) $^ $(LIBS) $(NVCCLIBS)
-clean:
-	rm -rf *.o impl/*.o utils/*.o libgpufaiss.a \
-		libgpufaiss.$(SHAREDEXT) \
-include depend
-depend:
-	for i in $(patsubst %.o,%.cpp,$(CPPOBJ)) \
-		 $(patsubst %.o,%.cu,$(CUOBJ)); do \
-	    $(CXXCPP) $(CPPFLAGS) -x c++ -MM $$i; \
-	done > depend
-install: libgpufaiss.a libgpufaiss.$(SHAREDEXT) installdirs
-	cp libgpufaiss.a libgpufaiss.$(SHAREDEXT) $(DESTDIR)$(libdir)
-	cp *.h $(DESTDIR)$(includedir)/faiss/gpu
-	cp --parents **/**.h $(DESTDIR)$(includedir)/faiss/gpu
-installdirs:
-	$(MKDIR_P) $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss/gpu
-.PHONY: all clean
--- a/gpu/test/Makefile
+++ b/gpu/test/Makefile
@@ -6,13 +6,45 @@
 -include ../../makefile.inc
-%.o: %.cpp
+TESTS_SRC = TestGpuIndexFlat.cpp TestGpuIndexIVFPQ.cpp \
-	$(CXX) $(CUDACFLAGS) -o $@ -c $^
+TestGpuIndexBinaryFlat.cpp TestGpuIndexIVFFlat.cpp TestGpuMemoryException.cpp
+CUDA_TESTS_SRC = TestGpuSelect.cu
-demo_ivfpq_indexing_gpu: demo_ivfpq_indexing_gpu.o ../libgpufaiss.a ../../libfaiss.a
+TESTS_BIN = $(TESTS_SRC:.cpp=)
-	$(CXX) $(LDFLAGS) $(NVCCLDFLAGS) -o $@ $^ $(LIBS) $(NVCCLIBS)
+CUDA_TESTS_BIN = $(CUDA_TESTS_SRC:.cu=)
+# test_gpu_index.py test_pytorch_faiss.py
+run: $(TESTS_BIN) $(CUDA_TESTS_BIN)
+	for t in $(TESTS_BIN) $(CUDA_TESTS_BIN); do ./$$t; done
+%.o: %.cu gtest
+	$(NVCC) $(NVCCFLAGS) -g -O3 -o $@ -c $< -Igtest/include
+%.o: %.cpp gtest
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ -c $< -Igtest/include
+%: %.o TestUtils.o ../../libfaiss.a gtest/make/gtest.a
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)
+demo_ivfpq_indexing_gpu: demo_ivfpq_indexing_gpu.o ../../libfaiss.a
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)
+demo_ivfpq_indexing_gpu.o: demo_ivfpq_indexing_gpu.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -o $@ -c $^
+gtest/make/gtest.a: gtest
+	$(MAKE) -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest.a
+gtest:
+	curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar xz && \
+	mv googletest-release-1.8.0/googletest gtest && \
+	rm -rf googletest-release-1.8.0
 clean:
-	rm -f *.o demo_ivfpq_indexing_gpu
+	rm -f *.o $(TESTS_BIN)
+	rm -rf gtest
+	rm -f demo_ivfpq_indexing_gpu
 .PHONY: clean
--- a/gpu/utils/DeviceUtils.cpp
+++ b/gpu/utils/DeviceUtils.cpp
-/**
- * Copyright (c) 2015-present, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD+Patents license found in the
- * LICENSE file in the root directory of this source tree.
- */
-#include "DeviceUtils.h"
-#include "../../FaissAssert.h"
-#include <mutex>
-#include <unordered_map>
-namespace faiss { namespace gpu {
-int getCurrentDevice() {
-  int dev = -1;
-  CUDA_VERIFY(cudaGetDevice(&dev));
-  FAISS_ASSERT(dev != -1);
-  return dev;
-}
-void setCurrentDevice(int device) {
-  CUDA_VERIFY(cudaSetDevice(device));
-}
-int getNumDevices() {
-  int numDev = -1;
-  CUDA_VERIFY(cudaGetDeviceCount(&numDev));
-  FAISS_ASSERT(numDev != -1);
-  return numDev;
-}
-void synchronizeAllDevices() {
-  for (int i = 0; i < getNumDevices(); ++i) {
-    DeviceScope scope(i);
-    CUDA_VERIFY(cudaDeviceSynchronize());
-  }
-}
-const cudaDeviceProp& getDeviceProperties(int device) {
-  static std::mutex mutex;
-  static std::unordered_map<int, cudaDeviceProp> properties;
-  std::lock_guard<std::mutex> guard(mutex);
-  auto it = properties.find(device);
-  if (it == properties.end()) {
-    cudaDeviceProp prop;
-    CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
-    properties[device] = prop;
-    it = properties.find(device);
-  }
-  return it->second;
-}
-const cudaDeviceProp& getCurrentDeviceProperties() {
-  return getDeviceProperties(getCurrentDevice());
-}
-int getMaxThreads(int device) {
-  return getDeviceProperties(device).maxThreadsPerBlock;
-}
-int getMaxThreadsCurrentDevice() {
-  return getMaxThreads(getCurrentDevice());
-}
-size_t getMaxSharedMemPerBlock(int device) {
-  return getDeviceProperties(device).sharedMemPerBlock;
-}
-size_t getMaxSharedMemPerBlockCurrentDevice() {
-  return getMaxSharedMemPerBlock(getCurrentDevice());
-}
-int getDeviceForAddress(const void* p) {
-  if (!p) {
-    return -1;
-  }
-  cudaPointerAttributes att;
-  cudaError_t err = cudaPointerGetAttributes(&att, p);
-  FAISS_ASSERT(err == cudaSuccess ||
-         err == cudaErrorInvalidValue);
-  if (err == cudaErrorInvalidValue) {
-    // Make sure the current thread error status has been reset
-    err = cudaGetLastError();
-    FAISS_ASSERT(err == cudaErrorInvalidValue);
-    return -1;
-  } else if (att.memoryType == cudaMemoryTypeHost) {
-    return -1;
-  } else {
-    return att.device;
-  }
-}
-bool getFullUnifiedMemSupport(int device) {
-  const auto& prop = getDeviceProperties(device);
-  return (prop.major >= 6);
-}
-bool getFullUnifiedMemSupportCurrentDevice() {
-  return getFullUnifiedMemSupport(getCurrentDevice());
-}
-DeviceScope::DeviceScope(int device) {
-  prevDevice_ = getCurrentDevice();
-  if (prevDevice_ != device) {
-    setCurrentDevice(device);
-  } else {
-    prevDevice_ = -1;
-  }
-}
-DeviceScope::~DeviceScope() {
-  if (prevDevice_ != -1) {
-    setCurrentDevice(prevDevice_);
-  }
-}
-CublasHandleScope::CublasHandleScope() {
-  auto blasStatus = cublasCreate(&blasHandle_);
-  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
-}
-CublasHandleScope::~CublasHandleScope() {
-  auto blasStatus = cublasDestroy(blasHandle_);
-  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
-}
-CudaEvent::CudaEvent(cudaStream_t stream)
-    : event_(0) {
-  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
-  CUDA_VERIFY(cudaEventRecord(event_, stream));
-}
-CudaEvent::CudaEvent(CudaEvent&& event) noexcept
-    : event_(std::move(event.event_)) {
-  event.event_ = 0;
-}
-CudaEvent::~CudaEvent() {
-  if (event_) {
-    CUDA_VERIFY(cudaEventDestroy(event_));
-  }
-}
-CudaEvent&
-CudaEvent::operator=(CudaEvent&& event) noexcept {
-  event_ = std::move(event.event_);
-  event.event_ = 0;
-  return *this;
-}
-void
-CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
-  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
-}
-void
-CudaEvent::cpuWaitOnEvent() {
-  CUDA_VERIFY(cudaEventSynchronize(event_));
-}
-} } // namespace
--- a/makefile.inc.in
+++ b/makefile.inc.in
@@ -6,27 +6,20 @@
 CXX          = @CXX@
 CXXCPP       = @CXXCPP@
-# TODO: Investigate the LAPACKE wrapper for LAPACK, which defines the correct
+CPPFLAGS     = -DFINTEGER=int @CPPFLAGS@ @OPENMP_CXXFLAGS@ @NVCC_CPPFLAGS@
-#   type for FORTRAN integers.
+CXXFLAGS     = -fPIC @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
-CPPFLAGS     = -DFINTEGER=int @CPPFLAGS@
-CXXFLAGS     = -fPIC @OPENMP_CXXFLAGS@ @ARCH_CXXFLAGS@ -Wno-sign-compare @CXXFLAGS@
 CPUFLAGS     = @ARCH_CPUFLAGS@
-LDFLAGS      = @OPENMP_LDFLAGS@ @LDFLAGS@
+LDFLAGS      = @OPENMP_CXXFLAGS@ @LDFLAGS@ @NVCC_LDFLAGS@
-LIBS         = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@
+LIBS         = @BLAS_LIBS@ @LAPACK_LIBS@ @LIBS@ @NVCC_LIBS@
 PYTHONCFLAGS = @PYTHON_CFLAGS@ -I@NUMPY_INCLUDE@
 NVCC         = @NVCC@
-NVCCLDFLAGS  = @NVCC_LDFLAGS@
+CUDA_ROOT    = @CUDA_PREFIX@
-NVCCLIBS     = @NVCC_LIBS@
+CUDA_ARCH    = @CUDA_ARCH@
-CUDAROOT     = @CUDA_PREFIX@
+NVCCFLAGS    = -I $(CUDA_ROOT)/targets/x86_64-linux/include/ \
-CUDACFLAGS   = @NVCC_CPPFLAGS@
-NVCCFLAGS    = -I $(CUDAROOT)/targets/x86_64-linux/include/ \
 -Xcompiler -fPIC \
 -Xcudafe --diag_suppress=unrecognized_attribute \
-gencode arch=compute_35,code="compute_35" \
+$(CUDA_ARCH) \
-gencode arch=compute_52,code="compute_52" \
-gencode arch=compute_60,code="compute_60" \
-gencode arch=compute_61,code="compute_61" \
 -lineinfo \
 -ccbin $(CXX) -DFAISS_USE_FLOAT16
@@ -43,6 +36,7 @@ endif
 MKDIR_P      = @MKDIR_P@
 PYTHON       = @PYTHON@
 SWIG         = @SWIG@
+AR          ?= ar 
 prefix      ?= @prefix@
 exec_prefix ?= @exec_prefix@

--- a/python/Makefile
+++ b/python/Makefile
@@ -6,52 +6,33 @@
 -include ../makefile.inc
-HEADERS = $(wildcard ../*.h)
+ifneq ($(strip $(NVCC)),)
+	SWIGFLAGS = -DGPU_WRAPPER
+endif
-all: cpu build
+all: build
-#############################
-# CPU
-cpu: _swigfaiss.so
 # Also silently generates swigfaiss.py.
-swigfaiss_wrap.cpp: swigfaiss.swig $(HEADERS)
+swigfaiss.cpp: swigfaiss.swig ../libfaiss.a
-	$(SWIG) -python -c++ -Doverride= -I../ -o $@ $<
+	$(SWIG) -python -c++ -Doverride= -I../ $(SWIGFLAGS) -o $@ $<
-swigfaiss_wrap.o: swigfaiss_wrap.cpp
+swigfaiss.o: swigfaiss.cpp
-	$(CXX) $(CXXFLAGS) $(CPUFLAGS) $(PYTHONCFLAGS) -I../ -c $< -o $@
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) $(PYTHONCFLAGS) \
+               -I../ -c $< -o $@
 # Extension is .so even on OSX.
-_swigfaiss.so: swigfaiss_wrap.o ../libfaiss.a
+_swigfaiss.so: swigfaiss.o ../libfaiss.a
-	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^  $(LIBS)
+	$(CXX) $(SHAREDFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
-#############################
-# GPU
-gpu: _swigfaiss_gpu.so
-# Also silently generates swigfaiss.py.
-swigfaiss_gpu_wrap.cpp: swigfaiss.swig
-	$(SWIG) -python -c++ -Doverride= -I../ -DGPU_WRAPPER -o $@ $<
-swigfaiss_gpu_wrap.o: swigfaiss_gpu_wrap.cpp
-	$(NVCC) $(NVCCFLAGS) $(PYTHONCFLAGS) -I../ -c $< -o $@
-_swigfaiss_gpu.so: swigfaiss_gpu_wrap.o ../gpu/libgpufaiss.a ../libfaiss.a
-	$(CXX) $(SHAREDFLAGS) $(NVCCLDFLAGS) $(LDFLAGS) -o $@ $^ $(NVCCLIBS) $(LIBS)
-build: cpu
+build: _swigfaiss.so faiss.py
 	$(PYTHON) setup.py build
 install: build
 	$(PYTHON) setup.py install
 clean:
-	rm -f swigfaiss_wrap.cpp swigfaiss_gpu_wrap.cpp
+	rm -f swigfaiss.{cpp,o,py}
-	rm -f swigfaiss.py swigfaiss_gpu.py
+	rm -f _swigfaiss.so
-	rm -f _swigfaiss.so _swigfaiss_gpu.so
+	rm -rf build/
-.PHONY: all build clean cpu gpu install
+.PHONY: all build clean install
--- a/python/faiss.py
+++ b/python/faiss.py
@@ -16,16 +16,7 @@ import pdb
 # we import * so that the symbol X can be accessed as faiss.X
+from .swigfaiss import *
-try:
-    from swigfaiss_gpu import *
-except ImportError as e:
-    if 'No module named' not in e.args[0]:
-        # swigfaiss_gpu is there but failed to load: Warn user about it.
-        sys.stderr.write("Failed to load GPU Faiss: %s\n" % e.args[0])
-        sys.stderr.write("Faiss falling back to CPU-only.\n")
-    from swigfaiss import *
 __version__ = "%d.%d.%d" % (FAISS_VERSION_MAJOR,
                            FAISS_VERSION_MINOR,

--- a/python/setup.py
+++ b/python/setup.py
@@ -8,8 +8,7 @@ here = os.path.abspath(os.path.dirname(__file__))
 check_fpath = os.path.join("_swigfaiss.so")
 if not os.path.exists(check_fpath):
    print("Could not find {}".format(check_fpath))
-    print("Have you run `make` and `make py` "
+    print("Have you run `make` and `make -C python`?")
-          "(and optionally `cd gpu && make && make py && cd ..`)?")
 # make the faiss python package dir
 shutil.rmtree("faiss", ignore_errors=True)
@@ -17,27 +16,22 @@ os.mkdir("faiss")
 shutil.copyfile("faiss.py", "faiss/__init__.py")
 shutil.copyfile("swigfaiss.py", "faiss/swigfaiss.py")
 shutil.copyfile("_swigfaiss.so", "faiss/_swigfaiss.so")
-try:
-    shutil.copyfile("_swigfaiss_gpu.so", "faiss/_swigfaiss_gpu.so")
-    shutil.copyfile("swigfaiss_gpu.py", "faiss/swigfaiss_gpu.py")
-except:
-    pass
 long_description="""
-Faiss is a library for efficient similarity search and clustering of dense 
+Faiss is a library for efficient similarity search and clustering of dense
 vectors. It contains algorithms that search in sets of vectors of any size,
- up to ones that possibly do not fit in RAM. It also contains supporting 
+ up to ones that possibly do not fit in RAM. It also contains supporting
-code for evaluation and parameter tuning. Faiss is written in C++ with 
+code for evaluation and parameter tuning. Faiss is written in C++ with
-complete wrappers for Python/numpy. Some of the most useful algorithms 
+complete wrappers for Python/numpy. Some of the most useful algorithms
 are implemented on the GPU. It is developed by Facebook AI Research.
 """
 setup(
    name='faiss',
-    version='0.1',
+    version='1.5.1',
    description='A library for efficient similarity search and clustering of dense vectors',
    long_description=long_description,
    url='https://github.com/facebookresearch/faiss',
-    author='Matthijs Douze, Jeff Johnson, Herve Jegou',
+    author='Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini',
    author_email='matthijs@fb.com',
    license='BSD',
    keywords='search nearest neighbors',

--- a/python/swigfaiss.py
+++ b/python/swigfaiss.py
--- a/python/swigfaiss.swig
+++ b/python/swigfaiss.swig
@@ -15,11 +15,7 @@
 // SWIGPYTHON: Python-specific code
 // GPU_WRAPPER: also compile interfaces for GPU.
-#ifdef GPU_WRAPPER
-%module swigfaiss_gpu;
-#else
 %module swigfaiss;
-#endif
 // fbode SWIG fails on warnings, so make them non fatal
 #pragma SWIG nowarn=321
@@ -328,16 +324,6 @@ int get_num_gpus()
 %include "gpu/GpuIndexBinaryFlat.h"
 %include "gpu/GpuDistance.h"
-#ifdef SWIGLUA
-/// in Lua, swigfaiss_gpu is known as swigfaiss
-%luacode {
-local swigfaiss = swigfaiss_gpu
-}
-#endif
 #endif

--- a/python/swigfaiss_gpu.py
+++ b/python/swigfaiss_gpu.py
--- a/python/swigfaiss_gpu_wrap.cpp
+++ b/python/swigfaiss_gpu_wrap.cpp
--- a/python/swigfaiss_wrap.cpp
+++ b/python/swigfaiss_wrap.cpp
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -7,21 +7,22 @@
 -include ../makefile.inc
 TESTS_SRC = $(wildcard *.cpp)
-TESTS     = $(TESTS_SRC:.cpp=.o)
+TESTS_OBJ = $(TESTS_SRC:.cpp=.o)
 all: run
 run: tests
 	./tests
-tests: $(TESTS) ../libfaiss.a gtest/make/gtest_main.a
+tests: $(TESTS_OBJ) ../libfaiss.a gtest/make/gtest_main.a
 	$(CXX) -o $@ $^ $(LDFLAGS) $(LIBS)
 %.o: %.cpp gtest
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c -o $@ $< -Igtest/include -I../..
 gtest/make/gtest_main.a: gtest
-	make -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest_main.a
+	$(MAKE) -C gtest/make CXX="$(CXX)" CXXFLAGS="$(CXXFLAGS)" gtest_main.a
 gtest:
 	curl -L https://github.com/google/googletest/archive/release-1.8.0.tar.gz | tar xz && \
@@ -29,8 +30,8 @@ gtest:
 	rm -rf googletest-release-1.8.0
 clean:
-	rm -f test_runner
+	rm -f tests
-	rm -f *.o
+	rm -f $(TESTS_OBJ)
 	rm -rf gtest