@inproceedings{247800b73c4948e69a235809e871d1ef,
title = "High-confidence near-duplicate image detection",
abstract = "In this paper, we propose two techniques for near-duplicate image detection at high confidence and large scale. First, we show that entropy-based filtering eliminates ambiguous SIFT features that cause most of the false positives, and enables claiming near-duplicity with a single match of the retained high-quality features. Second, we show that graph cut can be used for query expansion with a duplicity graph computed offline to substantially improve search quality. Evaluation with web images show that when combined with sketch embedding [6], our methods achieve false positive rate orders of magnitude lower than the standard visual word approach. We demonstrate the proposed techniques with a large-scale image search engine which, using indexing data structure offline computed with a Hadoop cluster, is capable of serving more than 50 million web images with a single commodity server.",
keywords = "Entropy, Graph cut, Near-duplicate, Query expansion",
author = "Wei Dong and Zhe Wang and Moses Charikar and Kai Li",
year = "2012",
doi = "10.1145/2324796.2324798",
language = "English (US)",
isbn = "9781450313292",
series = "Proceedings of the 2nd ACM International Conference on Multimedia Retrieval, ICMR 2012",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of the 2nd ACM International Conference on Multimedia Retrieval, ICMR 2012",
note = "2nd ACM International Conference on Multimedia Retrieval, ICMR 2012 ; Conference date: 05-06-2012 Through 08-06-2012",
}