@inproceedings{e005d658e880464a8117b182e969c44e,
title = "Multi-probe LSH: Efficient indexing for high-dimensional similarity search",
abstract = "Similarity indices for high-dimensional data are very desirable for building content-based search systems for feature-rich data such as audio, images, videos, and other sensor data. Recently, locality sensitive hashing (LSH) and its variations have been proposed as indexing techniques for approximate similarity search. A significant drawback of these approaches is the requirement for a large number of hash tables in order to achieve good search quality. This paper proposes a new indexing scheme called multi-probe LSH that overcomes this drawback. Multi-probe LSH is built on the well-known LSH technique, but it intelligently probes multiple buckets that are likely to contain query results in a hash table. Our method is inspired by and improves upon recent theoretical work on entropy-based LSH designed to reduce the space requirement of the basic LSH method. We have implemented the multi-probe LSH method and evaluated the implementation with two different high-dimensional datasets. Our evaluation shows that the multi-probe LSH method substantially improves upon previously proposed methods in both space and time efficiency. To achieve the same search quality, multi-probe LSH has a similar time-efficiency as the basic LSH method while reducing the number of hash tables by an order of magnitude. In comparison with the entropy-based LSH method, to achieve the same search quality, multi-probe LSH uses less query time and 5 to 8 times fewer number of hash tables.",
author = "Qin Lv and William Josephson and Zhe Wang and Moses Charikar and Kai Li",
year = "2007",
month = jan,
day = "1",
language = "English (US)",
series = "33rd International Conference on Very Large Data Bases, VLDB 2007 - Conference Proceedings",
publisher = "Association for Computing Machinery, Inc",
pages = "950--961",
editor = "Johannes Gehrke and Christoph Koch and Minos Garofalakis and Karl Aberer and Carl-Christian Kanne and Neuhold, {Erich J.} and Venkatesh Ganti and Wolfgang Klas and Chee-Yong Chan and Divesh Srivastava and Dana Florescu and Anand Deshpande",
booktitle = "33rd International Conference on Very Large Data Bases, VLDB 2007 - Conference Proceedings",
note = "33rd International Conference on Very Large Data Bases, VLDB 2007 ; Conference date: 23-09-2007 Through 27-09-2007",
}